No OneTemporary
Actions

Size

5 MB

Referenced Files

None

Subscribers

None

View Options

This file is larger than 256 KB, so syntax highlighting was skipped.

	diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
	index 36e10e4df4c1..44743fa0206f 100644
	--- a/clang/lib/CodeGen/TargetInfo.cpp
	+++ b/clang/lib/CodeGen/TargetInfo.cpp
	@@ -1,11869 +1,11869 @@
	//===---- TargetInfo.cpp - Encapsulate target details ------------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// These classes wrap the information about a call or function
	// definition used to handle ABI compliancy.
	//
	//===----------------------------------------------------------------------===//

	#include "TargetInfo.h"
	#include "ABIInfo.h"
	#include "CGBlocks.h"
	#include "CGCXXABI.h"
	#include "CGValue.h"
	#include "CodeGenFunction.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/CodeGenOptions.h"
	#include "clang/Basic/DiagnosticFrontend.h"
	#include "clang/CodeGen/CGFunctionInfo.h"
	#include "clang/CodeGen/SwiftCallingConv.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/IntrinsicsNVPTX.h"
	#include "llvm/IR/IntrinsicsS390.h"
	#include "llvm/IR/Type.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>

	using namespace clang;
	using namespace CodeGen;

	// Helper for coercing an aggregate argument or return value into an integer
	// array of the same size (including padding) and alignment. This alternate
	// coercion happens only for the RenderScript ABI and can be removed after
	// runtimes that rely on it are no longer supported.
	//
	// RenderScript assumes that the size of the argument / return value in the IR
	// is the same as the size of the corresponding qualified type. This helper
	// coerces the aggregate type into an array of the same size (including
	// padding). This coercion is used in lieu of expansion of struct members or
	// other canonical coercions that return a coerced-type of larger size.
	//
	// Ty - The argument / return value type
	// Context - The associated ASTContext
	// LLVMContext - The associated LLVMContext
	static ABIArgInfo coerceToIntArray(QualType Ty,
	ASTContext &Context,
	llvm::LLVMContext &LLVMContext) {
	// Alignment and Size are measured in bits.
	const uint64_t Size = Context.getTypeSize(Ty);
	const uint64_t Alignment = Context.getTypeAlign(Ty);
	llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment);
	const uint64_t NumElements = (Size + Alignment - 1) / Alignment;
	return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
	}

	static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
	llvm::Value *Array,
	llvm::Value *Value,
	unsigned FirstIndex,
	unsigned LastIndex) {
	// Alternatively, we could emit this as a loop in the source.
	for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
	llvm::Value *Cell =
	Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
	Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
	}
	}

	static bool isAggregateTypeForABI(QualType T) {
	return !CodeGenFunction::hasScalarEvaluationKind(T) \|\|
	T->isMemberFunctionPointerType();
	}

	ABIArgInfo ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByVal,
	bool Realign,
	llvm::Type *Padding) const {
	return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty), ByVal,
	Realign, Padding);
	}

	ABIArgInfo
	ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
	return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
	/ByVal/ false, Realign);
	}

	Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	return Address::invalid();
	}

	static llvm::Type *getVAListElementType(CodeGenFunction &CGF) {
	return CGF.ConvertTypeForMem(
	CGF.getContext().getBuiltinVaListType()->getPointeeType());
	}

	bool ABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
	if (Ty->isPromotableIntegerType())
	return true;

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() < getContext().getTypeSize(getContext().IntTy))
	return true;

	return false;
	}

	ABIInfo::~ABIInfo() {}

	/// Does the given lowering require more than the given number of
	/// registers when expanded?
	///
	/// This is intended to be the basis of a reasonable basic implementation
	/// of should{Pass,Return}IndirectlyForSwift.
	///
	/// For most targets, a limit of four total registers is reasonable; this
	/// limits the amount of code required in order to move around the value
	/// in case it wasn't produced immediately prior to the call by the caller
	/// (or wasn't produced in exactly the right registers) or isn't used
	/// immediately within the callee. But some targets may need to further
	/// limit the register count due to an inability to support that many
	/// return registers.
	static bool occupiesMoreThan(CodeGenTypes &cgt,
	ArrayRef<llvm::Type*> scalarTypes,
	unsigned maxAllRegisters) {
	unsigned intCount = 0, fpCount = 0;
	for (llvm::Type *type : scalarTypes) {
	if (type->isPointerTy()) {
	intCount++;
	} else if (auto intTy = dyn_cast<llvm::IntegerType>(type)) {
	auto ptrWidth = cgt.getTarget().getPointerWidth(0);
	intCount += (intTy->getBitWidth() + ptrWidth - 1) / ptrWidth;
	} else {
	assert(type->isVectorTy() \|\| type->isFloatingPointTy());
	fpCount++;
	}
	}

	return (intCount + fpCount > maxAllRegisters);
	}

	bool SwiftABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
	llvm::Type *eltTy,
	unsigned numElts) const {
	// The default implementation of this assumes that the target guarantees
	// 128-bit SIMD support but nothing more.
	return (vectorSize.getQuantity() > 8 && vectorSize.getQuantity() <= 16);
	}

	static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
	CGCXXABI &CXXABI) {
	const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(RT->getDecl());
	if (!RD) {
	if (!RT->getDecl()->canPassInRegisters())
	return CGCXXABI::RAA_Indirect;
	return CGCXXABI::RAA_Default;
	}
	return CXXABI.getRecordArgABI(RD);
	}

	static CGCXXABI::RecordArgABI getRecordArgABI(QualType T,
	CGCXXABI &CXXABI) {
	const RecordType *RT = T->getAs<RecordType>();
	if (!RT)
	return CGCXXABI::RAA_Default;
	return getRecordArgABI(RT, CXXABI);
	}

	static bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI,
	const ABIInfo &Info) {
	QualType Ty = FI.getReturnType();

	if (const auto *RT = Ty->getAs<RecordType>())
	if (!isa<CXXRecordDecl>(RT->getDecl()) &&
	!RT->getDecl()->canPassInRegisters()) {
	FI.getReturnInfo() = Info.getNaturalAlignIndirect(Ty);
	return true;
	}

	return CXXABI.classifyReturnType(FI);
	}

	/// Pass transparent unions as if they were the type of the first element. Sema
	/// should ensure that all elements of the union have the same "machine type".
	static QualType useFirstFieldIfTransparentUnion(QualType Ty) {
	if (const RecordType *UT = Ty->getAsUnionType()) {
	const RecordDecl *UD = UT->getDecl();
	if (UD->hasAttr<TransparentUnionAttr>()) {
	assert(!UD->field_empty() && "sema created an empty transparent union");
	return UD->field_begin()->getType();
	}
	}
	return Ty;
	}

	CGCXXABI &ABIInfo::getCXXABI() const {
	return CGT.getCXXABI();
	}

	ASTContext &ABIInfo::getContext() const {
	return CGT.getContext();
	}

	llvm::LLVMContext &ABIInfo::getVMContext() const {
	return CGT.getLLVMContext();
	}

	const llvm::DataLayout &ABIInfo::getDataLayout() const {
	return CGT.getDataLayout();
	}

	const TargetInfo &ABIInfo::getTarget() const {
	return CGT.getTarget();
	}

	const CodeGenOptions &ABIInfo::getCodeGenOpts() const {
	return CGT.getCodeGenOpts();
	}

	bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); }

	bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
	return false;
	}

	bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
	uint64_t Members) const {
	return false;
	}

	bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
	// For compatibility with GCC, ignore empty bitfields in C++ mode.
	return getContext().getLangOpts().CPlusPlus;
	}

	LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
	raw_ostream &OS = llvm::errs();
	OS << "(ABIArgInfo Kind=";
	switch (TheKind) {
	case Direct:
	OS << "Direct Type=";
	if (llvm::Type *Ty = getCoerceToType())
	Ty->print(OS);
	else
	OS << "null";
	break;
	case Extend:
	OS << "Extend";
	break;
	case Ignore:
	OS << "Ignore";
	break;
	case InAlloca:
	OS << "InAlloca Offset=" << getInAllocaFieldIndex();
	break;
	case Indirect:
	OS << "Indirect Align=" << getIndirectAlign().getQuantity()
	<< " ByVal=" << getIndirectByVal()
	<< " Realign=" << getIndirectRealign();
	break;
	case IndirectAliased:
	OS << "Indirect Align=" << getIndirectAlign().getQuantity()
	<< " AadrSpace=" << getIndirectAddrSpace()
	<< " Realign=" << getIndirectRealign();
	break;
	case Expand:
	OS << "Expand";
	break;
	case CoerceAndExpand:
	OS << "CoerceAndExpand Type=";
	getCoerceAndExpandType()->print(OS);
	break;
	}
	OS << ")\n";
	}

	// Dynamically round a pointer up to a multiple of the given alignment.
	static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
	llvm::Value *Ptr,
	CharUnits Align) {
	llvm::Value *PtrAsInt = Ptr;
	// OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
	PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
	PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
	llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1));
	PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
	llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity()));
	PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt,
	Ptr->getType(),
	Ptr->getName() + ".aligned");
	return PtrAsInt;
	}

	/// Emit va_arg for a platform using the common void* representation,
	/// where arguments are simply emitted in an array of slots on the stack.
	///
	/// This version implements the core direct-value passing rules.
	///
	/// \param SlotSize - The size and alignment of a stack slot.
	/// Each argument will be allocated to a multiple of this number of
	/// slots, and all the slots will be aligned to this value.
	/// \param AllowHigherAlign - The slot alignment is not a cap;
	/// an argument type with an alignment greater than the slot size
	/// will be emitted on a higher-alignment address, potentially
	/// leaving one or more empty slots behind as padding. If this
	/// is false, the returned address might be less-aligned than
	/// DirectAlign.
	static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
	Address VAListAddr,
	llvm::Type *DirectTy,
	CharUnits DirectSize,
	CharUnits DirectAlign,
	CharUnits SlotSize,
	bool AllowHigherAlign) {
	// Cast the element type to i8* if necessary. Some platforms define
	// va_list as a struct containing an i8* instead of just an i8*.
	if (VAListAddr.getElementType() != CGF.Int8PtrTy)
	VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);

	llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");

	// If the CC aligns values higher than the slot size, do so if needed.
	Address Addr = Address::invalid();
	if (AllowHigherAlign && DirectAlign > SlotSize) {
	Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign),
	CGF.Int8Ty, DirectAlign);
	} else {
	Addr = Address(Ptr, CGF.Int8Ty, SlotSize);
	}

	// Advance the pointer past the argument, then store that back.
	CharUnits FullDirectSize = DirectSize.alignTo(SlotSize);
	Address NextPtr =
	CGF.Builder.CreateConstInBoundsByteGEP(Addr, FullDirectSize, "argp.next");
	CGF.Builder.CreateStore(NextPtr.getPointer(), VAListAddr);

	// If the argument is smaller than a slot, and this is a big-endian
	// target, the argument will be right-adjusted in its slot.
	if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian() &&
	!DirectTy->isStructTy()) {
	Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
	}

	Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
	return Addr;
	}

	/// Emit va_arg for a platform using the common void* representation,
	/// where arguments are simply emitted in an array of slots on the stack.
	///
	/// \param IsIndirect - Values of this type are passed indirectly.
	/// \param ValueInfo - The size and alignment of this type, generally
	/// computed with getContext().getTypeInfoInChars(ValueTy).
	/// \param SlotSizeAndAlign - The size and alignment of a stack slot.
	/// Each argument will be allocated to a multiple of this number of
	/// slots, and all the slots will be aligned to this value.
	/// \param AllowHigherAlign - The slot alignment is not a cap;
	/// an argument type with an alignment greater than the slot size
	/// will be emitted on a higher-alignment address, potentially
	/// leaving one or more empty slots behind as padding.
	static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType ValueTy, bool IsIndirect,
	TypeInfoChars ValueInfo,
	CharUnits SlotSizeAndAlign,
	bool AllowHigherAlign) {
	// The size and alignment of the value that was passed directly.
	CharUnits DirectSize, DirectAlign;
	if (IsIndirect) {
	DirectSize = CGF.getPointerSize();
	DirectAlign = CGF.getPointerAlign();
	} else {
	DirectSize = ValueInfo.Width;
	DirectAlign = ValueInfo.Align;
	}

	// Cast the address we've calculated to the right type.
	llvm::Type DirectTy = CGF.ConvertTypeForMem(ValueTy), ElementTy = DirectTy;
	if (IsIndirect)
	DirectTy = DirectTy->getPointerTo(0);

	Address Addr =
	emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy, DirectSize, DirectAlign,
	SlotSizeAndAlign, AllowHigherAlign);

	if (IsIndirect) {
	Addr = Address(CGF.Builder.CreateLoad(Addr), ElementTy, ValueInfo.Align);
	}

	return Addr;
	}

	static Address complexTempStructure(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty, CharUnits SlotSize,
	CharUnits EltSize, const ComplexType *CTy) {
	Address Addr =
	emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty, SlotSize * 2,
	SlotSize, SlotSize, /AllowHigher/ true);

	Address RealAddr = Addr;
	Address ImagAddr = RealAddr;
	if (CGF.CGM.getDataLayout().isBigEndian()) {
	RealAddr =
	CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize - EltSize);
	ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
	2 * SlotSize - EltSize);
	} else {
	ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
	}

	llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
	RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
	ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
	llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
	llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");

	Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
	CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
	/init/ true);
	return Temp;
	}

	static Address emitMergePHI(CodeGenFunction &CGF,
	Address Addr1, llvm::BasicBlock *Block1,
	Address Addr2, llvm::BasicBlock *Block2,
	const llvm::Twine &Name = "") {
	assert(Addr1.getType() == Addr2.getType());
	llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
	PHI->addIncoming(Addr1.getPointer(), Block1);
	PHI->addIncoming(Addr2.getPointer(), Block2);
	CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
	return Address(PHI, Addr1.getElementType(), Align);
	}

	TargetCodeGenInfo::TargetCodeGenInfo(std::unique_ptr<ABIInfo> Info)
	: Info(std::move(Info)) {}

	TargetCodeGenInfo::~TargetCodeGenInfo() = default;

	// If someone can figure out a general rule for this, that would be great.
	// It's probably just doomed to be platform-dependent, though.
	unsigned TargetCodeGenInfo::getSizeOfUnwindException() const {
	// Verified for:
	// x86-64 FreeBSD, Linux, Darwin
	// x86-32 FreeBSD, Linux, Darwin
	// PowerPC Linux, Darwin
	// ARM Darwin (not EABI)
	// AArch64 Linux
	return 32;
	}

	bool TargetCodeGenInfo::isNoProtoCallVariadic(const CallArgList &args,
	const FunctionNoProtoType *fnType) const {
	// The following conventions are known to require this to be false:
	// x86_stdcall
	// MIPS
	// For everything else, we just prefer false unless we opt out.
	return false;
	}

	void
	TargetCodeGenInfo::getDependentLibraryOption(llvm::StringRef Lib,
	llvm::SmallString<24> &Opt) const {
	// This assumes the user is passing a library name like "rt" instead of a
	// filename like "librt.a/so", and that they don't care whether it's static or
	// dynamic.
	Opt = "-l";
	Opt += Lib;
	}

	unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
	// OpenCL kernels are called via an explicit runtime API with arguments
	// set with clSetKernelArg(), not as normal sub-functions.
	// Return SPIR_KERNEL by default as the kernel calling convention to
	// ensure the fingerprint is fixed such way that each OpenCL argument
	// gets one matching argument in the produced kernel function argument
	// list to enable feasible implementation of clSetKernelArg() with
	// aggregates etc. In case we would use the default C calling conv here,
	// clSetKernelArg() might break depending on the target-specific
	// conventions; different targets might split structs passed as values
	// to multiple function arguments etc.
	return llvm::CallingConv::SPIR_KERNEL;
	}

	llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
	llvm::PointerType *T, QualType QT) const {
	return llvm::ConstantPointerNull::get(T);
	}

	LangAS TargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
	const VarDecl *D) const {
	assert(!CGM.getLangOpts().OpenCL &&
	!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
	"Address space agnostic languages only");
	return D ? D->getType().getAddressSpace() : LangAS::Default;
	}

	llvm::Value *TargetCodeGenInfo::performAddrSpaceCast(
	CodeGen::CodeGenFunction &CGF, llvm::Value *Src, LangAS SrcAddr,
	LangAS DestAddr, llvm::Type *DestTy, bool isNonNull) const {
	// Since target may map different address spaces in AST to the same address
	// space, an address space conversion may end up as a bitcast.
	if (auto *C = dyn_cast<llvm::Constant>(Src))
	return performAddrSpaceCast(CGF.CGM, C, SrcAddr, DestAddr, DestTy);
	// Try to preserve the source's name to make IR more readable.
	return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
	Src, DestTy, Src->hasName() ? Src->getName() + ".ascast" : "");
	}

	llvm::Constant *
	TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
	LangAS SrcAddr, LangAS DestAddr,
	llvm::Type *DestTy) const {
	// Since target may map different address spaces in AST to the same address
	// space, an address space conversion may end up as a bitcast.
	return llvm::ConstantExpr::getPointerCast(Src, DestTy);
	}

	llvm::SyncScope::ID
	TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
	SyncScope Scope,
	llvm::AtomicOrdering Ordering,
	llvm::LLVMContext &Ctx) const {
	return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */
	}

	static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);

	/// isEmptyField - Return true iff a the field is "empty", that is it
	/// is an unnamed bit-field or an (array of) empty record(s).
	static bool isEmptyField(ASTContext &Context, const FieldDecl *FD,
	bool AllowArrays) {
	if (FD->isUnnamedBitfield())
	return true;

	QualType FT = FD->getType();

	// Constant arrays of empty records count as empty, strip them off.
	// Constant arrays of zero length always count as empty.
	bool WasArray = false;
	if (AllowArrays)
	while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
	if (AT->getSize() == 0)
	return true;
	FT = AT->getElementType();
	// The [[no_unique_address]] special case below does not apply to
	// arrays of C++ empty records, so we need to remember this fact.
	WasArray = true;
	}

	const RecordType *RT = FT->getAs<RecordType>();
	if (!RT)
	return false;

	// C++ record fields are never empty, at least in the Itanium ABI.
	//
	// FIXME: We should use a predicate for whether this behavior is true in the
	// current ABI.
	//
	// The exception to the above rule are fields marked with the
	// [[no_unique_address]] attribute (since C++20). Those do count as empty
	// according to the Itanium ABI. The exception applies only to records,
	// not arrays of records, so we must also check whether we stripped off an
	// array type above.
	if (isa<CXXRecordDecl>(RT->getDecl()) &&
	(WasArray \|\| !FD->hasAttr<NoUniqueAddressAttr>()))
	return false;

	return isEmptyRecord(Context, FT, AllowArrays);
	}

	/// isEmptyRecord - Return true iff a structure contains only empty
	/// fields. Note that a structure with a flexible array member is not
	/// considered empty.
	static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays) {
	const RecordType *RT = T->getAs<RecordType>();
	if (!RT)
	return false;
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return false;

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	for (const auto &I : CXXRD->bases())
	if (!isEmptyRecord(Context, I.getType(), true))
	return false;

	for (const auto *I : RD->fields())
	if (!isEmptyField(Context, I, AllowArrays))
	return false;
	return true;
	}

	/// isSingleElementStruct - Determine if a structure is a "single
	/// element struct", i.e. it has exactly one non-empty field or
	/// exactly one field which is itself a single element
	/// struct. Structures with flexible array members are never
	/// considered single element structs.
	///
	/// \return The field declaration for the single non-empty field, if
	/// it exists.
	static const Type *isSingleElementStruct(QualType T, ASTContext &Context) {
	const RecordType *RT = T->getAs<RecordType>();
	if (!RT)
	return nullptr;

	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return nullptr;

	const Type *Found = nullptr;

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	for (const auto &I : CXXRD->bases()) {
	// Ignore empty records.
	if (isEmptyRecord(Context, I.getType(), true))
	continue;

	// If we already found an element then this isn't a single-element struct.
	if (Found)
	return nullptr;

	// If this is non-empty and not a single element struct, the composite
	// cannot be a single element struct.
	Found = isSingleElementStruct(I.getType(), Context);
	if (!Found)
	return nullptr;
	}
	}

	// Check for single element.
	for (const auto *FD : RD->fields()) {
	QualType FT = FD->getType();

	// Ignore empty fields.
	if (isEmptyField(Context, FD, true))
	continue;

	// If we already found an element then this isn't a single-element
	// struct.
	if (Found)
	return nullptr;

	// Treat single element arrays as the element.
	while (const ConstantArrayType *AT = Context.getAsConstantArrayType(FT)) {
	if (AT->getSize().getZExtValue() != 1)
	break;
	FT = AT->getElementType();
	}

	if (!isAggregateTypeForABI(FT)) {
	Found = FT.getTypePtr();
	} else {
	Found = isSingleElementStruct(FT, Context);
	if (!Found)
	return nullptr;
	}
	}

	// We don't consider a struct a single-element struct if it has
	// padding beyond the element type.
	if (Found && Context.getTypeSize(Found) != Context.getTypeSize(T))
	return nullptr;

	return Found;
	}

	namespace {
	Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty,
	const ABIArgInfo &AI) {
	// This default implementation defers to the llvm backend's va_arg
	// instruction. It can handle only passing arguments directly
	// (typically only handled in the backend for primitive types), or
	// aggregates passed indirectly by pointer (NOTE: if the "byval"
	// flag has ABI impact in the callee, this implementation cannot
	// work.)

	// Only a few cases are covered here at the moment -- those needed
	// by the default abi.
	llvm::Value *Val;

	if (AI.isIndirect()) {
	assert(!AI.getPaddingType() &&
	"Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
	assert(
	!AI.getIndirectRealign() &&
	"Unexpected IndirectRealign seen in arginfo in generic VAArg emitter!");

	auto TyInfo = CGF.getContext().getTypeInfoInChars(Ty);
	CharUnits TyAlignForABI = TyInfo.Align;

	llvm::Type *ElementTy = CGF.ConvertTypeForMem(Ty);
	llvm::Type *BaseTy = llvm::PointerType::getUnqual(ElementTy);
	llvm::Value *Addr =
	CGF.Builder.CreateVAArg(VAListAddr.getPointer(), BaseTy);
	return Address(Addr, ElementTy, TyAlignForABI);
	} else {
	assert((AI.isDirect() \|\| AI.isExtend()) &&
	"Unexpected ArgInfo Kind in generic VAArg emitter!");

	assert(!AI.getInReg() &&
	"Unexpected InReg seen in arginfo in generic VAArg emitter!");
	assert(!AI.getPaddingType() &&
	"Unexpected PaddingType seen in arginfo in generic VAArg emitter!");
	assert(!AI.getDirectOffset() &&
	"Unexpected DirectOffset seen in arginfo in generic VAArg emitter!");
	assert(!AI.getCoerceToType() &&
	"Unexpected CoerceToType seen in arginfo in generic VAArg emitter!");

	Address Temp = CGF.CreateMemTemp(Ty, "varet");
	Val = CGF.Builder.CreateVAArg(VAListAddr.getPointer(),
	CGF.ConvertTypeForMem(Ty));
	CGF.Builder.CreateStore(Val, Temp);
	return Temp;
	}
	}

	/// DefaultABIInfo - The default implementation for ABI specific
	/// details. This implementation provides information which results in
	/// self-consistent and sensible LLVM IR generation, but does not
	/// conform to any particular ABI.
	class DefaultABIInfo : public ABIInfo {
	public:
	DefaultABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override {
	return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
	}
	};

	class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	DefaultTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
	};

	ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	if (isAggregateTypeForABI(Ty)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// passed by value.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	return getNaturalAlignIndirect(Ty);
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	ASTContext &Context = getContext();
	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() >
	Context.getTypeSize(Context.getTargetInfo().hasInt128Type()
	? Context.Int128Ty
	: Context.LongLongTy))
	return getNaturalAlignIndirect(Ty);

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (isAggregateTypeForABI(RetTy))
	return getNaturalAlignIndirect(RetTy);

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() >
	getContext().getTypeSize(getContext().getTargetInfo().hasInt128Type()
	? getContext().Int128Ty
	: getContext().LongLongTy))
	return getNaturalAlignIndirect(RetTy);

	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	//===----------------------------------------------------------------------===//
	// WebAssembly ABI Implementation
	//
	// This is a very simple ABI that relies a lot on DefaultABIInfo.
	//===----------------------------------------------------------------------===//

	class WebAssemblyABIInfo final : public SwiftABIInfo {
	public:
	enum ABIKind {
	MVP = 0,
	ExperimentalMV = 1,
	};

	private:
	DefaultABIInfo defaultInfo;
	ABIKind Kind;

	public:
	explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
	: SwiftABIInfo(CGT), defaultInfo(CGT), Kind(Kind) {}

	private:
	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType Ty) const;

	// DefaultABIInfo's classifyReturnType and classifyArgumentType are
	// non-virtual, but computeInfo and EmitVAArg are virtual, so we
	// overload them.
	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &Arg : FI.arguments())
	Arg.info = classifyArgumentType(Arg.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}

	bool isSwiftErrorInRegister() const override {
	return false;
	}
	};

	class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
	public:
	explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
	WebAssemblyABIInfo::ABIKind K)
	: TargetCodeGenInfo(std::make_unique<WebAssemblyABIInfo>(CGT, K)) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
	if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	if (const auto *Attr = FD->getAttr<WebAssemblyImportModuleAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	llvm::AttrBuilder B(GV->getContext());
	B.addAttribute("wasm-import-module", Attr->getImportModule());
	Fn->addFnAttrs(B);
	}
	if (const auto *Attr = FD->getAttr<WebAssemblyImportNameAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	llvm::AttrBuilder B(GV->getContext());
	B.addAttribute("wasm-import-name", Attr->getImportName());
	Fn->addFnAttrs(B);
	}
	if (const auto *Attr = FD->getAttr<WebAssemblyExportNameAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	llvm::AttrBuilder B(GV->getContext());
	B.addAttribute("wasm-export-name", Attr->getExportName());
	Fn->addFnAttrs(B);
	}
	}

	if (auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	if (!FD->doesThisDeclarationHaveABody() && !FD->hasPrototype())
	Fn->addFnAttr("no-prototype");
	}
	}
	};

	/// Classify argument of given type \p Ty.
	ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	if (isAggregateTypeForABI(Ty)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// passed by value.
	if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();
	// Lower single-element structs to just pass a regular value. TODO: We
	// could do reasonable-size multiple-element structs too, using getExpand(),
	// though watch out for things like bitfields.
	if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
	// For the experimental multivalue ABI, fully expand all other aggregates
	if (Kind == ABIKind::ExperimentalMV) {
	const RecordType *RT = Ty->getAs<RecordType>();
	assert(RT);
	bool HasBitField = false;
	for (auto *Field : RT->getDecl()->fields()) {
	if (Field->isBitField()) {
	HasBitField = true;
	break;
	}
	}
	if (!HasBitField)
	return ABIArgInfo::getExpand();
	}
	}

	// Otherwise just do the default thing.
	return defaultInfo.classifyArgumentType(Ty);
	}

	ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
	if (isAggregateTypeForABI(RetTy)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// returned by value.
	if (!getRecordArgABI(RetTy, getCXXABI())) {
	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), RetTy, true))
	return ABIArgInfo::getIgnore();
	// Lower single-element structs to just return a regular value. TODO: We
	// could do reasonable-size multiple-element structs too, using
	// ABIArgInfo::getDirect().
	if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
	// For the experimental multivalue ABI, return all other aggregates
	if (Kind == ABIKind::ExperimentalMV)
	return ABIArgInfo::getDirect();
	}
	}

	// Otherwise just do the default thing.
	return defaultInfo.classifyReturnType(RetTy);
	}

	Address WebAssemblyABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	bool IsIndirect = isAggregateTypeForABI(Ty) &&
	!isEmptyRecord(getContext(), Ty, true) &&
	!isSingleElementStruct(Ty, getContext());
	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
	getContext().getTypeInfoInChars(Ty),
	CharUnits::fromQuantity(4),
	/AllowHigherAlign=/true);
	}

	//===----------------------------------------------------------------------===//
	// le32/PNaCl bitcode ABI Implementation
	//
	// This is a simplified version of the x86_32 ABI. Arguments and return values
	// are always passed on the stack.
	//===----------------------------------------------------------------------===//

	class PNaClABIInfo : public ABIInfo {
	public:
	PNaClABIInfo(CodeGen::CodeGenTypes &CGT) : ABIInfo(CGT) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy) const;

	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF,
	Address VAListAddr, QualType Ty) const override;
	};

	class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	PNaClTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<PNaClABIInfo>(CGT)) {}
	};

	void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());

	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	// The PNaCL ABI is a bit odd, in that varargs don't use normal
	// function classification. Structs get passed directly for varargs
	// functions, through a rewriting transform in
	// pnacl-llvm/lib/Transforms/NaCl/ExpandVarArgs.cpp, which allows
	// this target to actually support a va_arg instructions with an
	// aggregate type, unlike other targets.
	return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());
	}

	/// Classify argument of given type \p Ty.
	ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
	if (isAggregateTypeForABI(Ty)) {
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
	return getNaturalAlignIndirect(Ty);
	} else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
	// Treat an enum type as its underlying type.
	Ty = EnumTy->getDecl()->getIntegerType();
	} else if (Ty->isFloatingType()) {
	// Floating-point types don't go inreg.
	return ABIArgInfo::getDirect();
	} else if (const auto *EIT = Ty->getAs<BitIntType>()) {
	// Treat bit-precise integers as integers if <= 64, otherwise pass
	// indirectly.
	if (EIT->getNumBits() > 64)
	return getNaturalAlignIndirect(Ty);
	return ABIArgInfo::getDirect();
	}

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	// In the PNaCl ABI we always return records/structures on the stack.
	if (isAggregateTypeForABI(RetTy))
	return getNaturalAlignIndirect(RetTy);

	// Treat bit-precise integers as integers if <= 64, otherwise pass indirectly.
	if (const auto *EIT = RetTy->getAs<BitIntType>()) {
	if (EIT->getNumBits() > 64)
	return getNaturalAlignIndirect(RetTy);
	return ABIArgInfo::getDirect();
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	/// IsX86_MMXType - Return true if this is an MMX type.
	bool IsX86_MMXType(llvm::Type *IRType) {
	// Return true if the type is an MMX type <2 x i32>, <4 x i16>, or <8 x i8>.
	return IRType->isVectorTy() && IRType->getPrimitiveSizeInBits() == 64 &&
	cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy() &&
	IRType->getScalarSizeInBits() != 64;
	}

	static llvm::Type* X86AdjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
	StringRef Constraint,
	llvm::Type* Ty) {
	bool IsMMXCons = llvm::StringSwitch<bool>(Constraint)
	.Cases("y", "&y", "^Ym", true)
	.Default(false);
	if (IsMMXCons && Ty->isVectorTy()) {
	if (cast<llvm::VectorType>(Ty)->getPrimitiveSizeInBits().getFixedSize() !=
	64) {
	// Invalid MMX constraint
	return nullptr;
	}

	return llvm::Type::getX86_MMXTy(CGF.getLLVMContext());
	}

	// No operation needed
	return Ty;
	}

	/// Returns true if this type can be passed in SSE registers with the
	/// X86_VectorCall calling convention. Shared between x86_32 and x86_64.
	static bool isX86VectorTypeForVectorCall(ASTContext &Context, QualType Ty) {
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	if (BT->isFloatingPoint() && BT->getKind() != BuiltinType::Half) {
	if (BT->getKind() == BuiltinType::LongDouble) {
	if (&Context.getTargetInfo().getLongDoubleFormat() ==
	&llvm::APFloat::x87DoubleExtended())
	return false;
	}
	return true;
	}
	} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
	// vectorcall can pass XMM, YMM, and ZMM vectors. We don't pass SSE1 MMX
	// registers specially.
	unsigned VecSize = Context.getTypeSize(VT);
	if (VecSize == 128 \|\| VecSize == 256 \|\| VecSize == 512)
	return true;
	}
	return false;
	}

	/// Returns true if this aggregate is small enough to be passed in SSE registers
	/// in the X86_VectorCall calling convention. Shared between x86_32 and x86_64.
	static bool isX86VectorCallAggregateSmallEnough(uint64_t NumMembers) {
	return NumMembers <= 4;
	}

	/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86.
	static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) {
	auto AI = ABIArgInfo::getDirect(T);
	AI.setInReg(true);
	AI.setCanBeFlattened(false);
	return AI;
	}

	//===----------------------------------------------------------------------===//
	// X86-32 ABI Implementation
	//===----------------------------------------------------------------------===//

	/// Similar to llvm::CCState, but for Clang.
	struct CCState {
	CCState(CGFunctionInfo &FI)
	: IsPreassigned(FI.arg_size()), CC(FI.getCallingConvention()) {}

	llvm::SmallBitVector IsPreassigned;
	unsigned CC = CallingConv::CC_C;
	unsigned FreeRegs = 0;
	unsigned FreeSSERegs = 0;
	};

	/// X86_32ABIInfo - The X86-32 ABI information.
	class X86_32ABIInfo : public SwiftABIInfo {
	enum Class {
	Integer,
	Float
	};

	static const unsigned MinABIStackAlignInBytes = 4;

	bool IsDarwinVectorABI;
	bool IsRetSmallStructInRegABI;
	bool IsWin32StructABI;
	bool IsSoftFloatABI;
	bool IsMCUABI;
	bool IsLinuxABI;
	unsigned DefaultNumRegisterParameters;

	static bool isRegisterSize(unsigned Size) {
	return (Size == 8 \|\| Size == 16 \|\| Size == 32 \|\| Size == 64);
	}

	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
	// FIXME: Assumes vectorcall is in use.
	return isX86VectorTypeForVectorCall(getContext(), Ty);
	}

	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
	uint64_t NumMembers) const override {
	// FIXME: Assumes vectorcall is in use.
	return isX86VectorCallAggregateSmallEnough(NumMembers);
	}

	bool shouldReturnTypeInRegister(QualType Ty, ASTContext &Context) const;

	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
	/// such that the argument will be passed in memory.
	ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;

	ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;

	/// Return the alignment to use for the given type on the stack.
	unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;

	Class classify(QualType Ty) const;
	ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;

	/// Updates the number of available free registers, returns
	/// true if any registers were allocated.
	bool updateFreeRegs(QualType Ty, CCState &State) const;

	bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
	bool &NeedsPadding) const;
	bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;

	bool canExpandIndirectArgument(QualType Ty) const;

	/// Rewrite the function info so that all memory arguments use
	/// inalloca.
	void rewriteWithInAlloca(CGFunctionInfo &FI) const;

	void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
	CharUnits &StackOffset, ABIArgInfo &Info,
	QualType Type) const;
	void runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const;

	public:

	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
	bool RetSmallStructInRegABI, bool Win32StructABI,
	unsigned NumRegisterParameters, bool SoftFloatABI)
	: SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
	IsRetSmallStructInRegABI(RetSmallStructInRegABI),
	IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI),
	IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
	IsLinuxABI(CGT.getTarget().getTriple().isOSLinux() \|\|
	CGT.getTarget().getTriple().isOSCygMing()),
	DefaultNumRegisterParameters(NumRegisterParameters) {}

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	// LLVM's x86-32 lowering currently only assigns up to three
	// integer registers and three fp registers. Oddly, it'll use up to
	// four vector registers for vectors, but those can overlap with the
	// scalar registers.
	return occupiesMoreThan(CGT, scalars, /total/ 3);
	}

	bool isSwiftErrorInRegister() const override {
	// x86-32 lowering does not support passing swifterror in a register.
	return false;
	}
	};

	class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
	bool RetSmallStructInRegABI, bool Win32StructABI,
	unsigned NumRegisterParameters, bool SoftFloatABI)
	: TargetCodeGenInfo(std::make_unique<X86_32ABIInfo>(
	CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
	NumRegisterParameters, SoftFloatABI)) {}

	static bool isStructReturnInRegABI(
	const llvm::Triple &Triple, const CodeGenOptions &Opts);

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override;

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
	// Darwin uses different dwarf register numbers for EH.
	if (CGM.getTarget().getTriple().isOSDarwin()) return 5;
	return 4;
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;

	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
	StringRef Constraint,
	llvm::Type* Ty) const override {
	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
	}

	void addReturnRegisterOutputs(CodeGenFunction &CGF, LValue ReturnValue,
	std::string &Constraints,
	std::vector<llvm::Type *> &ResultRegTypes,
	std::vector<llvm::Type *> &ResultTruncRegTypes,
	std::vector<LValue> &ResultRegDests,
	std::string &AsmString,
	unsigned NumOutputs) const override;

	llvm::Constant *
	getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
	unsigned Sig = (0xeb << 0) \| // jmp rel8
	(0x06 << 8) \| // .+0x08
	('v' << 16) \|
	('2' << 24);
	return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
	}

	StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
	return "movl\t%ebp, %ebp"
	"\t\t// marker for objc_retainAutoreleaseReturnValue";
	}
	};

	}

	/// Rewrite input constraint references after adding some output constraints.
	/// In the case where there is one output and one input and we add one output,
	/// we need to replace all operand references greater than or equal to 1:
	/// mov $0, $1
	/// mov eax, $1
	/// The result will be:
	/// mov $0, $2
	/// mov eax, $2
	static void rewriteInputConstraintReferences(unsigned FirstIn,
	unsigned NumNewOuts,
	std::string &AsmString) {
	std::string Buf;
	llvm::raw_string_ostream OS(Buf);
	size_t Pos = 0;
	while (Pos < AsmString.size()) {
	size_t DollarStart = AsmString.find('$', Pos);
	if (DollarStart == std::string::npos)
	DollarStart = AsmString.size();
	size_t DollarEnd = AsmString.find_first_not_of('$', DollarStart);
	if (DollarEnd == std::string::npos)
	DollarEnd = AsmString.size();
	OS << StringRef(&AsmString[Pos], DollarEnd - Pos);
	Pos = DollarEnd;
	size_t NumDollars = DollarEnd - DollarStart;
	if (NumDollars % 2 != 0 && Pos < AsmString.size()) {
	// We have an operand reference.
	size_t DigitStart = Pos;
	if (AsmString[DigitStart] == '{') {
	OS << '{';
	++DigitStart;
	}
	size_t DigitEnd = AsmString.find_first_not_of("0123456789", DigitStart);
	if (DigitEnd == std::string::npos)
	DigitEnd = AsmString.size();
	StringRef OperandStr(&AsmString[DigitStart], DigitEnd - DigitStart);
	unsigned OperandIndex;
	if (!OperandStr.getAsInteger(10, OperandIndex)) {
	if (OperandIndex >= FirstIn)
	OperandIndex += NumNewOuts;
	OS << OperandIndex;
	} else {
	OS << OperandStr;
	}
	Pos = DigitEnd;
	}
	}
	AsmString = std::move(OS.str());
	}

	/// Add output constraints for EAX:EDX because they are return registers.
	void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
	CodeGenFunction &CGF, LValue ReturnSlot, std::string &Constraints,
	std::vector<llvm::Type *> &ResultRegTypes,
	std::vector<llvm::Type *> &ResultTruncRegTypes,
	std::vector<LValue> &ResultRegDests, std::string &AsmString,
	unsigned NumOutputs) const {
	uint64_t RetWidth = CGF.getContext().getTypeSize(ReturnSlot.getType());

	// Use the EAX constraint if the width is 32 or smaller and EAX:EDX if it is
	// larger.
	if (!Constraints.empty())
	Constraints += ',';
	if (RetWidth <= 32) {
	Constraints += "={eax}";
	ResultRegTypes.push_back(CGF.Int32Ty);
	} else {
	// Use the 'A' constraint for EAX:EDX.
	Constraints += "=A";
	ResultRegTypes.push_back(CGF.Int64Ty);
	}

	// Truncate EAX or EAX:EDX to an integer of the appropriate size.
	llvm::Type *CoerceTy = llvm::IntegerType::get(CGF.getLLVMContext(), RetWidth);
	ResultTruncRegTypes.push_back(CoerceTy);

	// Coerce the integer by bitcasting the return slot pointer.
	ReturnSlot.setAddress(
	CGF.Builder.CreateElementBitCast(ReturnSlot.getAddress(CGF), CoerceTy));
	ResultRegDests.push_back(ReturnSlot);

	rewriteInputConstraintReferences(NumOutputs, 1, AsmString);
	}

	/// shouldReturnTypeInRegister - Determine if the given type should be
	/// returned in a register (for the Darwin and MCU ABI).
	bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
	ASTContext &Context) const {
	uint64_t Size = Context.getTypeSize(Ty);

	// For i386, type must be register sized.
	// For the MCU ABI, it only needs to be <= 8-byte
	if ((IsMCUABI && Size > 64) \|\| (!IsMCUABI && !isRegisterSize(Size)))
	return false;

	if (Ty->isVectorType()) {
	// 64- and 128- bit vectors inside structures are not returned in
	// registers.
	if (Size == 64 \|\| Size == 128)
	return false;

	return true;
	}

	// If this is a builtin, pointer, enum, complex type, member pointer, or
	// member function pointer it is ok.
	if (Ty->getAs<BuiltinType>() \|\| Ty->hasPointerRepresentation() \|\|
	Ty->isAnyComplexType() \|\| Ty->isEnumeralType() \|\|
	Ty->isBlockPointerType() \|\| Ty->isMemberPointerType())
	return true;

	// Arrays are treated like records.
	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty))
	return shouldReturnTypeInRegister(AT->getElementType(), Context);

	// Otherwise, it must be a record type.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (!RT) return false;

	// FIXME: Traverse bases here too.

	// Structure types are passed in register if all fields would be
	// passed in a register.
	for (const auto *FD : RT->getDecl()->fields()) {
	// Empty fields are ignored.
	if (isEmptyField(Context, FD, true))
	continue;

	// Check fields recursively.
	if (!shouldReturnTypeInRegister(FD->getType(), Context))
	return false;
	}
	return true;
	}

	static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) {
	// Treat complex types as the element type.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>())
	Ty = CTy->getElementType();

	// Check for a type which we know has a simple scalar argument-passing
	// convention without any padding. (We're specifically looking for 32
	// and 64-bit integer and integer-equivalents, float, and double.)
	if (!Ty->getAs<BuiltinType>() && !Ty->hasPointerRepresentation() &&
	!Ty->isEnumeralType() && !Ty->isBlockPointerType())
	return false;

	uint64_t Size = Context.getTypeSize(Ty);
	return Size == 32 \|\| Size == 64;
	}

	static bool addFieldSizes(ASTContext &Context, const RecordDecl *RD,
	uint64_t &Size) {
	for (const auto *FD : RD->fields()) {
	// Scalar arguments on the stack get 4 byte alignment on x86. If the
	// argument is smaller than 32-bits, expanding the struct will create
	// alignment padding.
	if (!is32Or64BitBasicType(FD->getType(), Context))
	return false;

	// FIXME: Reject bit-fields wholesale; there are two problems, we don't know
	// how to expand them yet, and the predicate for telling if a bitfield still
	// counts as "basic" is more complicated than what we were doing previously.
	if (FD->isBitField())
	return false;

	Size += Context.getTypeSize(FD->getType());
	}
	return true;
	}

	static bool addBaseAndFieldSizes(ASTContext &Context, const CXXRecordDecl *RD,
	uint64_t &Size) {
	// Don't do this if there are any non-empty bases.
	for (const CXXBaseSpecifier &Base : RD->bases()) {
	if (!addBaseAndFieldSizes(Context, Base.getType()->getAsCXXRecordDecl(),
	Size))
	return false;
	}
	if (!addFieldSizes(Context, RD, Size))
	return false;
	return true;
	}

	/// Test whether an argument type which is to be passed indirectly (on the
	/// stack) would have the equivalent layout if it was expanded into separate
	/// arguments. If so, we prefer to do the latter to avoid inhibiting
	/// optimizations.
	bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const {
	// We can only expand structure types.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (!RT)
	return false;
	const RecordDecl *RD = RT->getDecl();
	uint64_t Size = 0;
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	if (!IsWin32StructABI) {
	// On non-Windows, we have to conservatively match our old bitcode
	// prototypes in order to be ABI-compatible at the bitcode level.
	if (!CXXRD->isCLike())
	return false;
	} else {
	// Don't do this for dynamic classes.
	if (CXXRD->isDynamicClass())
	return false;
	}
	if (!addBaseAndFieldSizes(getContext(), CXXRD, Size))
	return false;
	} else {
	if (!addFieldSizes(getContext(), RD, Size))
	return false;
	}

	// We can do this if there was no alignment padding.
	return Size == getContext().getTypeSize(Ty);
	}

	ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
	// If the return value is indirect, then the hidden argument is consuming one
	// integer register.
	if (State.FreeRegs) {
	--State.FreeRegs;
	if (!IsMCUABI)
	return getNaturalAlignIndirectInReg(RetTy);
	}
	return getNaturalAlignIndirect(RetTy, /ByVal=/false);
	}

	ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
	CCState &State) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	const Type *Base = nullptr;
	uint64_t NumElts = 0;
	if ((State.CC == llvm::CallingConv::X86_VectorCall \|\|
	State.CC == llvm::CallingConv::X86_RegCall) &&
	isHomogeneousAggregate(RetTy, Base, NumElts)) {
	// The LLVM struct type for such an aggregate should lower properly.
	return ABIArgInfo::getDirect();
	}

	if (const VectorType *VT = RetTy->getAs<VectorType>()) {
	// On Darwin, some vectors are returned in registers.
	if (IsDarwinVectorABI) {
	uint64_t Size = getContext().getTypeSize(RetTy);

	// 128-bit vectors are a special case; they are returned in
	// registers and we need to make sure to pick a type the LLVM
	// backend will like.
	if (Size == 128)
	return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
	llvm::Type::getInt64Ty(getVMContext()), 2));

	// Always return in register if it fits in a general purpose
	// register, or if it is 64 bits and has a single element.
	if ((Size == 8 \|\| Size == 16 \|\| Size == 32) \|\|
	(Size == 64 && VT->getNumElements() == 1))
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
	Size));

	return getIndirectReturnResult(RetTy, State);
	}

	return ABIArgInfo::getDirect();
	}

	if (isAggregateTypeForABI(RetTy)) {
	if (const RecordType *RT = RetTy->getAs<RecordType>()) {
	// Structures with flexible arrays are always indirect.
	if (RT->getDecl()->hasFlexibleArrayMember())
	return getIndirectReturnResult(RetTy, State);
	}

	// If specified, structs and unions are always indirect.
	if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
	return getIndirectReturnResult(RetTy, State);

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), RetTy, true))
	return ABIArgInfo::getIgnore();

	// Return complex of _Float16 as <2 x half> so the backend will use xmm0.
	if (const ComplexType *CT = RetTy->getAs<ComplexType>()) {
	QualType ET = getContext().getCanonicalType(CT->getElementType());
	if (ET->isFloat16Type())
	return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
	llvm::Type::getHalfTy(getVMContext()), 2));
	}

	// Small structures which are register sized are generally returned
	// in a register.
	if (shouldReturnTypeInRegister(RetTy, getContext())) {
	uint64_t Size = getContext().getTypeSize(RetTy);

	// As a special-case, if the struct is a "single-element" struct, and
	// the field is of type "float" or "double", return it in a
	// floating-point register. (MSVC does not apply this special case.)
	// We apply a similar transformation for pointer types to improve the
	// quality of the generated IR.
	if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
	if ((!IsWin32StructABI && SeltTy->isRealFloatingType())
	\|\| SeltTy->hasPointerRepresentation())
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));

	// FIXME: We should be able to narrow this integer in cases with dead
	// padding.
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
	}

	return getIndirectReturnResult(RetTy, State);
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() > 64)
	return getIndirectReturnResult(RetTy, State);

	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	static bool isSIMDVectorType(ASTContext &Context, QualType Ty) {
	return Ty->getAs<VectorType>() && Context.getTypeSize(Ty) == 128;
	}

	static bool isRecordWithSIMDVectorType(ASTContext &Context, QualType Ty) {
	const RecordType *RT = Ty->getAs<RecordType>();
	if (!RT)
	return false;
	const RecordDecl *RD = RT->getDecl();

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	for (const auto &I : CXXRD->bases())
	if (!isRecordWithSIMDVectorType(Context, I.getType()))
	return false;

	for (const auto *i : RD->fields()) {
	QualType FT = i->getType();

	if (isSIMDVectorType(Context, FT))
	return true;

	if (isRecordWithSIMDVectorType(Context, FT))
	return true;
	}

	return false;
	}

	unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty,
	unsigned Align) const {
	// Otherwise, if the alignment is less than or equal to the minimum ABI
	// alignment, just use the default; the backend will handle this.
	if (Align <= MinABIStackAlignInBytes)
	return 0; // Use default alignment.

	if (IsLinuxABI) {
	// Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't
	// want to spend any effort dealing with the ramifications of ABI breaks.
	//
	// If the vector type is __m128/__m256/__m512, return the default alignment.
	if (Ty->isVectorType() && (Align == 16 \|\| Align == 32 \|\| Align == 64))
	return Align;
	}
	// On non-Darwin, the stack type alignment is always 4.
	if (!IsDarwinVectorABI) {
	// Set explicit alignment, since we may need to realign the top.
	return MinABIStackAlignInBytes;
	}

	// Otherwise, if the type contains an SSE vector type, the alignment is 16.
	if (Align >= 16 && (isSIMDVectorType(getContext(), Ty) \|\|
	isRecordWithSIMDVectorType(getContext(), Ty)))
	return 16;

	return MinABIStackAlignInBytes;
	}

	ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
	CCState &State) const {
	if (!ByVal) {
	if (State.FreeRegs) {
	--State.FreeRegs; // Non-byval indirects just use one pointer.
	if (!IsMCUABI)
	return getNaturalAlignIndirectInReg(Ty);
	}
	return getNaturalAlignIndirect(Ty, false);
	}

	// Compute the byval alignment.
	unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
	unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
	if (StackAlign == 0)
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /ByVal=/true);

	// If the stack alignment is less than the type alignment, realign the
	// argument.
	bool Realign = TypeAlign > StackAlign;
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
	/ByVal=/true, Realign);
	}

	X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
	const Type *T = isSingleElementStruct(Ty, getContext());
	if (!T)
	T = Ty.getTypePtr();

	if (const BuiltinType *BT = T->getAs<BuiltinType>()) {
	BuiltinType::Kind K = BT->getKind();
	if (K == BuiltinType::Float \|\| K == BuiltinType::Double)
	return Float;
	}
	return Integer;
	}

	bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
	if (!IsSoftFloatABI) {
	Class C = classify(Ty);
	if (C == Float)
	return false;
	}

	unsigned Size = getContext().getTypeSize(Ty);
	unsigned SizeInRegs = (Size + 31) / 32;

	if (SizeInRegs == 0)
	return false;

	if (!IsMCUABI) {
	if (SizeInRegs > State.FreeRegs) {
	State.FreeRegs = 0;
	return false;
	}
	} else {
	// The MCU psABI allows passing parameters in-reg even if there are
	// earlier parameters that are passed on the stack. Also,
	// it does not allow passing >8-byte structs in-register,
	// even if there are 3 free registers available.
	if (SizeInRegs > State.FreeRegs \|\| SizeInRegs > 2)
	return false;
	}

	State.FreeRegs -= SizeInRegs;
	return true;
	}

	bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State,
	bool &InReg,
	bool &NeedsPadding) const {
	// On Windows, aggregates other than HFAs are never passed in registers, and
	// they do not consume register slots. Homogenous floating-point aggregates
	// (HFAs) have already been dealt with at this point.
	if (IsWin32StructABI && isAggregateTypeForABI(Ty))
	return false;

	NeedsPadding = false;
	InReg = !IsMCUABI;

	if (!updateFreeRegs(Ty, State))
	return false;

	if (IsMCUABI)
	return true;

	if (State.CC == llvm::CallingConv::X86_FastCall \|\|
	State.CC == llvm::CallingConv::X86_VectorCall \|\|
	State.CC == llvm::CallingConv::X86_RegCall) {
	if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
	NeedsPadding = true;

	return false;
	}

	return true;
	}

	bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
	if (!updateFreeRegs(Ty, State))
	return false;

	if (IsMCUABI)
	return false;

	if (State.CC == llvm::CallingConv::X86_FastCall \|\|
	State.CC == llvm::CallingConv::X86_VectorCall \|\|
	State.CC == llvm::CallingConv::X86_RegCall) {
	if (getContext().getTypeSize(Ty) > 32)
	return false;

	return (Ty->isIntegralOrEnumerationType() \|\| Ty->isPointerType() \|\|
	Ty->isReferenceType());
	}

	return true;
	}

	void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) const {
	// Vectorcall x86 works subtly different than in x64, so the format is
	// a bit different than the x64 version. First, all vector types (not HVAs)
	// are assigned, with the first 6 ending up in the [XYZ]MM0-5 registers.
	// This differs from the x64 implementation, where the first 6 by INDEX get
	// registers.
	// In the second pass over the arguments, HVAs are passed in the remaining
	// vector registers if possible, or indirectly by address. The address will be
	// passed in ECX/EDX if available. Any other arguments are passed according to
	// the usual fastcall rules.
	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
	for (int I = 0, E = Args.size(); I < E; ++I) {
	const Type *Base = nullptr;
	uint64_t NumElts = 0;
	const QualType &Ty = Args[I].type;
	if ((Ty->isVectorType() \|\| Ty->isBuiltinType()) &&
	isHomogeneousAggregate(Ty, Base, NumElts)) {
	if (State.FreeSSERegs >= NumElts) {
	State.FreeSSERegs -= NumElts;
	Args[I].info = ABIArgInfo::getDirectInReg();
	State.IsPreassigned.set(I);
	}
	}
	}
	}

	ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
	CCState &State) const {
	// FIXME: Set alignment on indirect arguments.
	bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall;
	bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall;
	bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;

	Ty = useFirstFieldIfTransparentUnion(Ty);
	TypeInfo TI = getContext().getTypeInfo(Ty);

	// Check with the C++ ABI first.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (RT) {
	CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
	if (RAA == CGCXXABI::RAA_Indirect) {
	return getIndirectResult(Ty, false, State);
	} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
	// The field index doesn't matter, we'll fix it up later.
	return ABIArgInfo::getInAlloca(/FieldIndex=/0);
	}
	}

	// Regcall uses the concept of a homogenous vector aggregate, similar
	// to other targets.
	const Type *Base = nullptr;
	uint64_t NumElts = 0;
	if ((IsRegCall \|\| IsVectorCall) &&
	isHomogeneousAggregate(Ty, Base, NumElts)) {
	if (State.FreeSSERegs >= NumElts) {
	State.FreeSSERegs -= NumElts;

	// Vectorcall passes HVAs directly and does not flatten them, but regcall
	// does.
	if (IsVectorCall)
	return getDirectX86Hva();

	if (Ty->isBuiltinType() \|\| Ty->isVectorType())
	return ABIArgInfo::getDirect();
	return ABIArgInfo::getExpand();
	}
	return getIndirectResult(Ty, /ByVal=/false, State);
	}

	if (isAggregateTypeForABI(Ty)) {
	// Structures with flexible arrays are always indirect.
	// FIXME: This should not be byval!
	if (RT && RT->getDecl()->hasFlexibleArrayMember())
	return getIndirectResult(Ty, true, State);

	// Ignore empty structs/unions on non-Windows.
	if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	llvm::LLVMContext &LLVMContext = getVMContext();
	llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
	bool NeedsPadding = false;
	bool InReg;
	if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
	unsigned SizeInRegs = (TI.Width + 31) / 32;
	SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
	llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
	if (InReg)
	return ABIArgInfo::getDirectInReg(Result);
	else
	return ABIArgInfo::getDirect(Result);
	}
	llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;

	// Pass over-aligned aggregates on Windows indirectly. This behavior was
	// added in MSVC 2015.
	if (IsWin32StructABI && TI.isAlignRequired() && TI.Align > 32)
	return getIndirectResult(Ty, /ByVal=/false, State);

	// Expand small (<= 128-bit) record types when we know that the stack layout
	// of those arguments will match the struct. This is important because the
	// LLVM backend isn't smart enough to remove byval, which inhibits many
	// optimizations.
	// Don't do this for the MCU if there are still free integer registers
	// (see X86_64 ABI for full explanation).
	if (TI.Width <= 4 * 32 && (!IsMCUABI \|\| State.FreeRegs == 0) &&
	canExpandIndirectArgument(Ty))
	return ABIArgInfo::getExpandWithPadding(
	IsFastCall \|\| IsVectorCall \|\| IsRegCall, PaddingType);

	return getIndirectResult(Ty, true, State);
	}

	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	// On Windows, vectors are passed directly if registers are available, or
	// indirectly if not. This avoids the need to align argument memory. Pass
	// user-defined vector types larger than 512 bits indirectly for simplicity.
	if (IsWin32StructABI) {
	if (TI.Width <= 512 && State.FreeSSERegs > 0) {
	--State.FreeSSERegs;
	return ABIArgInfo::getDirectInReg();
	}
	return getIndirectResult(Ty, /ByVal=/false, State);
	}

	// On Darwin, some vectors are passed in memory, we handle this by passing
	// it as an i8/i16/i32/i64.
	if (IsDarwinVectorABI) {
	if ((TI.Width == 8 \|\| TI.Width == 16 \|\| TI.Width == 32) \|\|
	(TI.Width == 64 && VT->getNumElements() == 1))
	return ABIArgInfo::getDirect(
	llvm::IntegerType::get(getVMContext(), TI.Width));
	}

	if (IsX86_MMXType(CGT.ConvertType(Ty)))
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64));

	return ABIArgInfo::getDirect();
	}


	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	bool InReg = shouldPrimitiveUseInReg(Ty, State);

	if (isPromotableIntegerTypeForABI(Ty)) {
	if (InReg)
	return ABIArgInfo::getExtendInReg(Ty);
	return ABIArgInfo::getExtend(Ty);
	}

	if (const auto *EIT = Ty->getAs<BitIntType>()) {
	if (EIT->getNumBits() <= 64) {
	if (InReg)
	return ABIArgInfo::getDirectInReg();
	return ABIArgInfo::getDirect();
	}
	return getIndirectResult(Ty, /ByVal=/false, State);
	}

	if (InReg)
	return ABIArgInfo::getDirectInReg();
	return ABIArgInfo::getDirect();
	}

	void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
	CCState State(FI);
	if (IsMCUABI)
	State.FreeRegs = 3;
	else if (State.CC == llvm::CallingConv::X86_FastCall) {
	State.FreeRegs = 2;
	State.FreeSSERegs = 3;
	} else if (State.CC == llvm::CallingConv::X86_VectorCall) {
	State.FreeRegs = 2;
	State.FreeSSERegs = 6;
	} else if (FI.getHasRegParm())
	State.FreeRegs = FI.getRegParm();
	else if (State.CC == llvm::CallingConv::X86_RegCall) {
	State.FreeRegs = 5;
	State.FreeSSERegs = 8;
	} else if (IsWin32StructABI) {
	// Since MSVC 2015, the first three SSE vectors have been passed in
	// registers. The rest are passed indirectly.
	State.FreeRegs = DefaultNumRegisterParameters;
	State.FreeSSERegs = 3;
	} else
	State.FreeRegs = DefaultNumRegisterParameters;

	if (!::classifyReturnType(getCXXABI(), FI, *this)) {
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State);
	} else if (FI.getReturnInfo().isIndirect()) {
	// The C++ ABI is not aware of register usage, so we have to check if the
	// return value was sret and put it in a register ourselves if appropriate.
	if (State.FreeRegs) {
	--State.FreeRegs; // The sret parameter consumes a register.
	if (!IsMCUABI)
	FI.getReturnInfo().setInReg(true);
	}
	}

	// The chain argument effectively gives us another free register.
	if (FI.isChainCall())
	++State.FreeRegs;

	// For vectorcall, do a first pass over the arguments, assigning FP and vector
	// arguments to XMM registers as available.
	if (State.CC == llvm::CallingConv::X86_VectorCall)
	runVectorCallFirstPass(FI, State);

	bool UsedInAlloca = false;
	MutableArrayRef<CGFunctionInfoArgInfo> Args = FI.arguments();
	for (int I = 0, E = Args.size(); I < E; ++I) {
	// Skip arguments that have already been assigned.
	if (State.IsPreassigned.test(I))
	continue;

	Args[I].info = classifyArgumentType(Args[I].type, State);
	UsedInAlloca \|= (Args[I].info.getKind() == ABIArgInfo::InAlloca);
	}

	// If we needed to use inalloca for any argument, do a second pass and rewrite
	// all the memory arguments to use inalloca.
	if (UsedInAlloca)
	rewriteWithInAlloca(FI);
	}

	void
	X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
	CharUnits &StackOffset, ABIArgInfo &Info,
	QualType Type) const {
	// Arguments are always 4-byte-aligned.
	CharUnits WordSize = CharUnits::fromQuantity(4);
	assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");

	// sret pointers and indirect things will require an extra pointer
	// indirection, unless they are byval. Most things are byval, and will not
	// require this indirection.
	bool IsIndirect = false;
	if (Info.isIndirect() && !Info.getIndirectByVal())
	IsIndirect = true;
	Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
	llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
	if (IsIndirect)
	LLTy = LLTy->getPointerTo(0);
	FrameFields.push_back(LLTy);
	StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);

	// Insert padding bytes to respect alignment.
	CharUnits FieldEnd = StackOffset;
	StackOffset = FieldEnd.alignTo(WordSize);
	if (StackOffset != FieldEnd) {
	CharUnits NumBytes = StackOffset - FieldEnd;
	llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
	Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
	FrameFields.push_back(Ty);
	}
	}

	static bool isArgInAlloca(const ABIArgInfo &Info) {
	// Leave ignored and inreg arguments alone.
	switch (Info.getKind()) {
	case ABIArgInfo::InAlloca:
	return true;
	case ABIArgInfo::Ignore:
	case ABIArgInfo::IndirectAliased:
	return false;
	case ABIArgInfo::Indirect:
	case ABIArgInfo::Direct:
	case ABIArgInfo::Extend:
	return !Info.getInReg();
	case ABIArgInfo::Expand:
	case ABIArgInfo::CoerceAndExpand:
	// These are aggregate types which are never passed in registers when
	// inalloca is involved.
	return true;
	}
	llvm_unreachable("invalid enum");
	}

	void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
	assert(IsWin32StructABI && "inalloca only supported on win32");

	// Build a packed struct type for all of the arguments in memory.
	SmallVector<llvm::Type *, 6> FrameFields;

	// The stack alignment is always 4.
	CharUnits StackAlign = CharUnits::fromQuantity(4);

	CharUnits StackOffset;
	CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();

	// Put 'this' into the struct before 'sret', if necessary.
	bool IsThisCall =
	FI.getCallingConvention() == llvm::CallingConv::X86_ThisCall;
	ABIArgInfo &Ret = FI.getReturnInfo();
	if (Ret.isIndirect() && Ret.isSRetAfterThis() && !IsThisCall &&
	isArgInAlloca(I->info)) {
	addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
	++I;
	}

	// Put the sret parameter into the inalloca struct if it's in memory.
	if (Ret.isIndirect() && !Ret.getInReg()) {
	addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
	// On Windows, the hidden sret parameter is always returned in eax.
	Ret.setInAllocaSRet(IsWin32StructABI);
	}

	// Skip the 'this' parameter in ecx.
	if (IsThisCall)
	++I;

	// Put arguments passed in memory into the struct.
	for (; I != E; ++I) {
	if (isArgInAlloca(I->info))
	addFieldToArgStruct(FrameFields, StackOffset, I->info, I->type);
	}

	FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
	/isPacked=/true),
	StackAlign);
	}

	Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
	Address VAListAddr, QualType Ty) const {

	auto TypeInfo = getContext().getTypeInfoInChars(Ty);

	// x86-32 changes the alignment of certain arguments on the stack.
	//
	// Just messing with TypeInfo like this works because we never pass
	// anything indirectly.
	TypeInfo.Align = CharUnits::fromQuantity(
	getTypeStackAlignInBytes(Ty, TypeInfo.Align.getQuantity()));

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /Indirect/ false,
	TypeInfo, CharUnits::fromQuantity(4),
	/AllowHigherAlign/ true);
	}

	bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
	const llvm::Triple &Triple, const CodeGenOptions &Opts) {
	assert(Triple.getArch() == llvm::Triple::x86);

	switch (Opts.getStructReturnConvention()) {
	case CodeGenOptions::SRCK_Default:
	break;
	case CodeGenOptions::SRCK_OnStack: // -fpcc-struct-return
	return false;
	case CodeGenOptions::SRCK_InRegs: // -freg-struct-return
	return true;
	}

	if (Triple.isOSDarwin() \|\| Triple.isOSIAMCU())
	return true;

	switch (Triple.getOS()) {
	case llvm::Triple::DragonFly:
	case llvm::Triple::FreeBSD:
	case llvm::Triple::OpenBSD:
	case llvm::Triple::Win32:
	return true;
	default:
	return false;
	}
	}

	static void addX86InterruptAttrs(const FunctionDecl FD, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) {
	if (!FD->hasAttr<AnyX86InterruptAttr>())
	return;

	llvm::Function *Fn = cast<llvm::Function>(GV);
	Fn->setCallingConv(llvm::CallingConv::X86_INTR);
	if (FD->getNumParams() == 0)
	return;

	auto PtrTy = cast<PointerType>(FD->getParamDecl(0)->getType());
	llvm::Type *ByValTy = CGM.getTypes().ConvertType(PtrTy->getPointeeType());
	llvm::Attribute NewAttr = llvm::Attribute::getWithByValType(
	Fn->getContext(), ByValTy);
	Fn->addParamAttr(0, NewAttr);
	}

	void X86_32TargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
	if (GV->isDeclaration())
	return;
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	Fn->addFnAttr("stackrealign");
	}

	addX86InterruptAttrs(FD, GV, CGM);
	}
	}

	bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
	CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	CodeGen::CGBuilderTy &Builder = CGF.Builder;

	llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);

	// 0-7 are the eight integer registers; the order is different
	// on Darwin (for EH), but the range is the same.
	// 8 is %eip.
	AssignToArrayRange(Builder, Address, Four8, 0, 8);

	if (CGF.CGM.getTarget().getTriple().isOSDarwin()) {
	// 12-16 are st(0..4). Not sure why we stop at 4.
	// These have size 16, which is sizeof(long double) on
	// platforms with 8-byte alignment for that type.
	llvm::Value *Sixteen8 = llvm::ConstantInt::get(CGF.Int8Ty, 16);
	AssignToArrayRange(Builder, Address, Sixteen8, 12, 16);

	} else {
	// 9 is %eflags, which doesn't get a size on Darwin for some
	// reason.
	Builder.CreateAlignedStore(
	Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
	CharUnits::One());

	// 11-16 are st(0..5). Not sure why we stop at 5.
	// These have size 12, which is sizeof(long double) on
	// platforms with 4-byte alignment for that type.
	llvm::Value *Twelve8 = llvm::ConstantInt::get(CGF.Int8Ty, 12);
	AssignToArrayRange(Builder, Address, Twelve8, 11, 16);
	}

	return false;
	}

	//===----------------------------------------------------------------------===//
	// X86-64 ABI Implementation
	//===----------------------------------------------------------------------===//


	namespace {
	/// The AVX ABI level for X86 targets.
	enum class X86AVXABILevel {
	None,
	AVX,
	AVX512
	};

	/// \p returns the size in bits of the largest (native) vector for \p AVXLevel.
	static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) {
	switch (AVXLevel) {
	case X86AVXABILevel::AVX512:
	return 512;
	case X86AVXABILevel::AVX:
	return 256;
	case X86AVXABILevel::None:
	return 128;
	}
	llvm_unreachable("Unknown AVXLevel");
	}

	/// X86_64ABIInfo - The X86_64 ABI information.
	class X86_64ABIInfo : public SwiftABIInfo {
	enum Class {
	Integer = 0,
	SSE,
	SSEUp,
	X87,
	X87Up,
	ComplexX87,
	NoClass,
	Memory
	};

	/// merge - Implement the X86_64 ABI merging algorithm.
	///
	/// Merge an accumulating classification \arg Accum with a field
	/// classification \arg Field.
	///
	/// \param Accum - The accumulating classification. This should
	/// always be either NoClass or the result of a previous merge
	/// call. In addition, this should never be Memory (the caller
	/// should just return Memory for the aggregate).
	static Class merge(Class Accum, Class Field);

	/// postMerge - Implement the X86_64 ABI post merging algorithm.
	///
	/// Post merger cleanup, reduces a malformed Hi and Lo pair to
	/// final MEMORY or SSE classes when necessary.
	///
	/// \param AggregateSize - The size of the current aggregate in
	/// the classification process.
	///
	/// \param Lo - The classification for the parts of the type
	/// residing in the low word of the containing object.
	///
	/// \param Hi - The classification for the parts of the type
	/// residing in the higher words of the containing object.
	///
	void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const;

	/// classify - Determine the x86_64 register classes in which the
	/// given type T should be passed.
	///
	/// \param Lo - The classification for the parts of the type
	/// residing in the low word of the containing object.
	///
	/// \param Hi - The classification for the parts of the type
	/// residing in the high word of the containing object.
	///
	/// \param OffsetBase - The bit offset of this type in the
	/// containing object. Some parameters are classified different
	/// depending on whether they straddle an eightbyte boundary.
	///
	/// \param isNamedArg - Whether the argument in question is a "named"
	/// argument, as used in AMD64-ABI 3.5.7.
	///
	/// \param IsRegCall - Whether the calling conversion is regcall.
	///
	/// If a word is unused its result will be NoClass; if a type should
	/// be passed in Memory then at least the classification of \arg Lo
	/// will be Memory.
	///
	/// The \arg Lo class will be NoClass iff the argument is ignored.
	///
	/// If the \arg Lo class is ComplexX87, then the \arg Hi class will
	/// also be ComplexX87.
	void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi,
	bool isNamedArg, bool IsRegCall = false) const;

	llvm::Type *GetByteVectorType(QualType Ty) const;
	llvm::Type GetSSETypeAtOffset(llvm::Type IRType,
	unsigned IROffset, QualType SourceTy,
	unsigned SourceOffset) const;
	llvm::Type GetINTEGERTypeAtOffset(llvm::Type IRType,
	unsigned IROffset, QualType SourceTy,
	unsigned SourceOffset) const;

	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
	/// such that the argument will be returned in memory.
	ABIArgInfo getIndirectReturnResult(QualType Ty) const;

	/// getIndirectResult - Give a source type \arg Ty, return a suitable result
	/// such that the argument will be passed in memory.
	///
	/// \param freeIntRegs - The number of free integer registers remaining
	/// available.
	ABIArgInfo getIndirectResult(QualType Ty, unsigned freeIntRegs) const;

	ABIArgInfo classifyReturnType(QualType RetTy) const;

	ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs,
	unsigned &neededInt, unsigned &neededSSE,
	bool isNamedArg,
	bool IsRegCall = false) const;

	ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
	unsigned &NeededSSE,
	unsigned &MaxVectorWidth) const;

	ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
	unsigned &NeededSSE,
	unsigned &MaxVectorWidth) const;

	bool IsIllegalVectorType(QualType Ty) const;

	/// The 0.98 ABI revision clarified a lot of ambiguities,
	/// unfortunately in ways that were not always consistent with
	/// certain previous compilers. In particular, platforms which
	/// required strict binary compatibility with older versions of GCC
	/// may need to exempt themselves.
	bool honorsRevision0_98() const {
	return !getTarget().getTriple().isOSDarwin();
	}

	/// GCC classifies <1 x long long> as SSE but some platform ABIs choose to
	/// classify it as INTEGER (for compatibility with older clang compilers).
	bool classifyIntegerMMXAsSSE() const {
	// Clang <= 3.8 did not do this.
	if (getContext().getLangOpts().getClangABICompat() <=
	LangOptions::ClangABI::Ver3_8)
	return false;

	const llvm::Triple &Triple = getTarget().getTriple();
	if (Triple.isOSDarwin() \|\| Triple.isPS())
	return false;
	if (Triple.isOSFreeBSD() && Triple.getOSMajorVersion() >= 10)
	return false;
	return true;
	}

	// GCC classifies vectors of __int128 as memory.
	bool passInt128VectorsInMem() const {
	// Clang <= 9.0 did not do this.
	if (getContext().getLangOpts().getClangABICompat() <=
	LangOptions::ClangABI::Ver9)
	return false;

	const llvm::Triple &T = getTarget().getTriple();
	return T.isOSLinux() \|\| T.isOSNetBSD();
	}

	X86AVXABILevel AVXLevel;
	// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
	// 64-bit hardware.
	bool Has64BitPointers;

	public:
	X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) :
	SwiftABIInfo(CGT), AVXLevel(AVXLevel),
	Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) {
	}

	bool isPassedUsingAVXType(QualType type) const {
	unsigned neededInt, neededSSE;
	// The freeIntRegs argument doesn't matter here.
	ABIArgInfo info = classifyArgumentType(type, 0, neededInt, neededSSE,
	/isNamedArg/true);
	if (info.isDirect()) {
	llvm::Type *ty = info.getCoerceToType();
	if (llvm::VectorType *vectorTy = dyn_cast_or_null<llvm::VectorType>(ty))
	return vectorTy->getPrimitiveSizeInBits().getFixedSize() > 128;
	}
	return false;
	}

	void computeInfo(CGFunctionInfo &FI) const override;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool has64BitPointers() const {
	return Has64BitPointers;
	}

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}
	bool isSwiftErrorInRegister() const override {
	return true;
	}
	};

	/// WinX86_64ABIInfo - The Windows X86_64 ABI information.
	class WinX86_64ABIInfo : public SwiftABIInfo {
	public:
	WinX86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
	: SwiftABIInfo(CGT), AVXLevel(AVXLevel),
	IsMingw64(getTarget().getTriple().isWindowsGNUEnvironment()) {}

	void computeInfo(CGFunctionInfo &FI) const override;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool isHomogeneousAggregateBaseType(QualType Ty) const override {
	// FIXME: Assumes vectorcall is in use.
	return isX86VectorTypeForVectorCall(getContext(), Ty);
	}

	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
	uint64_t NumMembers) const override {
	// FIXME: Assumes vectorcall is in use.
	return isX86VectorCallAggregateSmallEnough(NumMembers);
	}

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type *> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}

	bool isSwiftErrorInRegister() const override {
	return true;
	}

	private:
	ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
	bool IsVectorCall, bool IsRegCall) const;
	ABIArgInfo reclassifyHvaArgForVectorCall(QualType Ty, unsigned &FreeSSERegs,
	const ABIArgInfo &current) const;

	X86AVXABILevel AVXLevel;

	bool IsMingw64;
	};

	class X86_64TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel)
	: TargetCodeGenInfo(std::make_unique<X86_64ABIInfo>(CGT, AVXLevel)) {}

	const X86_64ABIInfo &getABIInfo() const {
	return static_cast<const X86_64ABIInfo&>(TargetCodeGenInfo::getABIInfo());
	}

	/// Disable tail call on x86-64. The epilogue code before the tail jump blocks
	/// autoreleaseRV/retainRV and autoreleaseRV/unsafeClaimRV optimizations.
	bool markARCOptimizedReturnCallsAsNoTail() const override { return true; }

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
	return 7;
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);

	// 0-15 are the 16 integer registers.
	// 16 is %rip.
	AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
	return false;
	}

	llvm::Type* adjustInlineAsmType(CodeGen::CodeGenFunction &CGF,
	StringRef Constraint,
	llvm::Type* Ty) const override {
	return X86AdjustInlineAsmType(CGF, Constraint, Ty);
	}

	bool isNoProtoCallVariadic(const CallArgList &args,
	const FunctionNoProtoType *fnType) const override {
	// The default CC on x86-64 sets %al to the number of SSA
	// registers used, and GCC sets this when calling an unprototyped
	// function, so we override the default behavior. However, don't do
	// that when AVX types are involved: the ABI explicitly states it is
	// undefined, and it doesn't work in practice because of how the ABI
	// defines varargs anyway.
	if (fnType->getCallConv() == CC_C) {
	bool HasAVXType = false;
	for (CallArgList::const_iterator
	it = args.begin(), ie = args.end(); it != ie; ++it) {
	if (getABIInfo().isPassedUsingAVXType(it->Ty)) {
	HasAVXType = true;
	break;
	}
	}

	if (!HasAVXType)
	return true;
	}

	return TargetCodeGenInfo::isNoProtoCallVariadic(args, fnType);
	}

	llvm::Constant *
	getUBSanFunctionSignature(CodeGen::CodeGenModule &CGM) const override {
	unsigned Sig = (0xeb << 0) \| // jmp rel8
	(0x06 << 8) \| // .+0x08
	('v' << 16) \|
	('2' << 24);
	return llvm::ConstantInt::get(CGM.Int32Ty, Sig);
	}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	if (GV->isDeclaration())
	return;
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	Fn->addFnAttr("stackrealign");
	}

	addX86InterruptAttrs(FD, GV, CGM);
	}
	}

	void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
	const FunctionDecl *Caller,
	const FunctionDecl *Callee,
	const CallArgList &Args) const override;
	};

	static void initFeatureMaps(const ASTContext &Ctx,
	llvm::StringMap<bool> &CallerMap,
	const FunctionDecl *Caller,
	llvm::StringMap<bool> &CalleeMap,
	const FunctionDecl *Callee) {
	if (CalleeMap.empty() && CallerMap.empty()) {
	// The caller is potentially nullptr in the case where the call isn't in a
	// function. In this case, the getFunctionFeatureMap ensures we just get
	// the TU level setting (since it cannot be modified by 'target'..
	Ctx.getFunctionFeatureMap(CallerMap, Caller);
	Ctx.getFunctionFeatureMap(CalleeMap, Callee);
	}
	}

	static bool checkAVXParamFeature(DiagnosticsEngine &Diag,
	SourceLocation CallLoc,
	const llvm::StringMap<bool> &CallerMap,
	const llvm::StringMap<bool> &CalleeMap,
	QualType Ty, StringRef Feature,
	bool IsArgument) {
	bool CallerHasFeat = CallerMap.lookup(Feature);
	bool CalleeHasFeat = CalleeMap.lookup(Feature);
	if (!CallerHasFeat && !CalleeHasFeat)
	return Diag.Report(CallLoc, diag::warn_avx_calling_convention)
	<< IsArgument << Ty << Feature;

	// Mixing calling conventions here is very clearly an error.
	if (!CallerHasFeat \|\| !CalleeHasFeat)
	return Diag.Report(CallLoc, diag::err_avx_calling_convention)
	<< IsArgument << Ty << Feature;

	// Else, both caller and callee have the required feature, so there is no need
	// to diagnose.
	return false;
	}

	static bool checkAVXParam(DiagnosticsEngine &Diag, ASTContext &Ctx,
	SourceLocation CallLoc,
	const llvm::StringMap<bool> &CallerMap,
	const llvm::StringMap<bool> &CalleeMap, QualType Ty,
	bool IsArgument) {
	uint64_t Size = Ctx.getTypeSize(Ty);
	if (Size > 256)
	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty,
	"avx512f", IsArgument);

	if (Size > 128)
	return checkAVXParamFeature(Diag, CallLoc, CallerMap, CalleeMap, Ty, "avx",
	IsArgument);

	return false;
	}

	void X86_64TargetCodeGenInfo::checkFunctionCallABI(
	CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
	const FunctionDecl *Callee, const CallArgList &Args) const {
	llvm::StringMap<bool> CallerMap;
	llvm::StringMap<bool> CalleeMap;
	unsigned ArgIndex = 0;

	// We need to loop through the actual call arguments rather than the the
	// function's parameters, in case this variadic.
	for (const CallArg &Arg : Args) {
	// The "avx" feature changes how vectors >128 in size are passed. "avx512f"
	// additionally changes how vectors >256 in size are passed. Like GCC, we
	// warn when a function is called with an argument where this will change.
	// Unlike GCC, we also error when it is an obvious ABI mismatch, that is,
	// the caller and callee features are mismatched.
	// Unfortunately, we cannot do this diagnostic in SEMA, since the callee can
	// change its ABI with attribute-target after this call.
	if (Arg.getType()->isVectorType() &&
	CGM.getContext().getTypeSize(Arg.getType()) > 128) {
	initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
	QualType Ty = Arg.getType();
	// The CallArg seems to have desugared the type already, so for clearer
	// diagnostics, replace it with the type in the FunctionDecl if possible.
	if (ArgIndex < Callee->getNumParams())
	Ty = Callee->getParamDecl(ArgIndex)->getType();

	if (checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
	CalleeMap, Ty, /IsArgument/ true))
	return;
	}
	++ArgIndex;
	}

	// Check return always, as we don't have a good way of knowing in codegen
	// whether this value is used, tail-called, etc.
	if (Callee->getReturnType()->isVectorType() &&
	CGM.getContext().getTypeSize(Callee->getReturnType()) > 128) {
	initFeatureMaps(CGM.getContext(), CallerMap, Caller, CalleeMap, Callee);
	checkAVXParam(CGM.getDiags(), CGM.getContext(), CallLoc, CallerMap,
	CalleeMap, Callee->getReturnType(),
	/IsArgument/ false);
	}
	}

	static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
	// If the argument does not end in .lib, automatically add the suffix.
	// If the argument contains a space, enclose it in quotes.
	// This matches the behavior of MSVC.
	bool Quote = Lib.contains(' ');
	std::string ArgStr = Quote ? "\"" : "";
	ArgStr += Lib;
	if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a"))
	ArgStr += ".lib";
	ArgStr += Quote ? "\"" : "";
	return ArgStr;
	}

	class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
	public:
	WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
	bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
	unsigned NumRegisterParameters)
	: X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
	Win32StructABI, NumRegisterParameters, false) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override;

	void getDependentLibraryOption(llvm::StringRef Lib,
	llvm::SmallString<24> &Opt) const override {
	Opt = "/DEFAULTLIB:";
	Opt += qualifyWindowsLibrary(Lib);
	}

	void getDetectMismatchOption(llvm::StringRef Name,
	llvm::StringRef Value,
	llvm::SmallString<32> &Opt) const override {
	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
	}
	};

	static void addStackProbeTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) {
	if (llvm::Function *Fn = dyn_cast_or_null<llvm::Function>(GV)) {

	if (CGM.getCodeGenOpts().StackProbeSize != 4096)
	Fn->addFnAttr("stack-probe-size",
	llvm::utostr(CGM.getCodeGenOpts().StackProbeSize));
	if (CGM.getCodeGenOpts().NoStackArgProbe)
	Fn->addFnAttr("no-stack-arg-probe");
	}
	}

	void WinX86_32TargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
	X86_32TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
	if (GV->isDeclaration())
	return;
	addStackProbeTargetAttributes(D, GV, CGM);
	}

	class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
	X86AVXABILevel AVXLevel)
	: TargetCodeGenInfo(std::make_unique<WinX86_64ABIInfo>(CGT, AVXLevel)) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override;

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
	return 7;
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	llvm::Value *Eight8 = llvm::ConstantInt::get(CGF.Int8Ty, 8);

	// 0-15 are the 16 integer registers.
	// 16 is %rip.
	AssignToArrayRange(CGF.Builder, Address, Eight8, 0, 16);
	return false;
	}

	void getDependentLibraryOption(llvm::StringRef Lib,
	llvm::SmallString<24> &Opt) const override {
	Opt = "/DEFAULTLIB:";
	Opt += qualifyWindowsLibrary(Lib);
	}

	void getDetectMismatchOption(llvm::StringRef Name,
	llvm::StringRef Value,
	llvm::SmallString<32> &Opt) const override {
	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
	}
	};

	void WinX86_64TargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
	TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
	if (GV->isDeclaration())
	return;
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
	llvm::Function *Fn = cast<llvm::Function>(GV);
	Fn->addFnAttr("stackrealign");
	}

	addX86InterruptAttrs(FD, GV, CGM);
	}

	addStackProbeTargetAttributes(D, GV, CGM);
	}
	}

	void X86_64ABIInfo::postMerge(unsigned AggregateSize, Class &Lo,
	Class &Hi) const {
	// AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done:
	//
	// (a) If one of the classes is Memory, the whole argument is passed in
	// memory.
	//
	// (b) If X87UP is not preceded by X87, the whole argument is passed in
	// memory.
	//
	// (c) If the size of the aggregate exceeds two eightbytes and the first
	// eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole
	// argument is passed in memory. NOTE: This is necessary to keep the
	// ABI working for processors that don't support the __m256 type.
	//
	// (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE.
	//
	// Some of these are enforced by the merging logic. Others can arise
	// only with unions; for example:
	// union { _Complex double; unsigned; }
	//
	// Note that clauses (b) and (c) were added in 0.98.
	//
	if (Hi == Memory)
	Lo = Memory;
	if (Hi == X87Up && Lo != X87 && honorsRevision0_98())
	Lo = Memory;
	if (AggregateSize > 128 && (Lo != SSE \|\| Hi != SSEUp))
	Lo = Memory;
	if (Hi == SSEUp && Lo != SSE)
	Hi = SSE;
	}

	X86_64ABIInfo::Class X86_64ABIInfo::merge(Class Accum, Class Field) {
	// AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is
	// classified recursively so that always two fields are
	// considered. The resulting class is calculated according to
	// the classes of the fields in the eightbyte:
	//
	// (a) If both classes are equal, this is the resulting class.
	//
	// (b) If one of the classes is NO_CLASS, the resulting class is
	// the other class.
	//
	// (c) If one of the classes is MEMORY, the result is the MEMORY
	// class.
	//
	// (d) If one of the classes is INTEGER, the result is the
	// INTEGER.
	//
	// (e) If one of the classes is X87, X87UP, COMPLEX_X87 class,
	// MEMORY is used as class.
	//
	// (f) Otherwise class SSE is used.

	// Accum should never be memory (we should have returned) or
	// ComplexX87 (because this cannot be passed in a structure).
	assert((Accum != Memory && Accum != ComplexX87) &&
	"Invalid accumulated classification during merge.");
	if (Accum == Field \|\| Field == NoClass)
	return Accum;
	if (Field == Memory)
	return Memory;
	if (Accum == NoClass)
	return Field;
	if (Accum == Integer \|\| Field == Integer)
	return Integer;
	if (Field == X87 \|\| Field == X87Up \|\| Field == ComplexX87 \|\|
	Accum == X87 \|\| Accum == X87Up)
	return Memory;
	return SSE;
	}

	void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo,
	Class &Hi, bool isNamedArg, bool IsRegCall) const {
	// FIXME: This code can be simplified by introducing a simple value class for
	// Class pairs with appropriate constructor methods for the various
	// situations.

	// FIXME: Some of the split computations are wrong; unaligned vectors
	// shouldn't be passed in registers for example, so there is no chance they
	// can straddle an eightbyte. Verify & simplify.

	Lo = Hi = NoClass;

	Class &Current = OffsetBase < 64 ? Lo : Hi;
	Current = Memory;

	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	BuiltinType::Kind k = BT->getKind();

	if (k == BuiltinType::Void) {
	Current = NoClass;
	} else if (k == BuiltinType::Int128 \|\| k == BuiltinType::UInt128) {
	Lo = Integer;
	Hi = Integer;
	} else if (k >= BuiltinType::Bool && k <= BuiltinType::LongLong) {
	Current = Integer;
	} else if (k == BuiltinType::Float \|\| k == BuiltinType::Double \|\|
	k == BuiltinType::Float16 \|\| k == BuiltinType::BFloat16) {
	Current = SSE;
	} else if (k == BuiltinType::LongDouble) {
	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
	if (LDF == &llvm::APFloat::IEEEquad()) {
	Lo = SSE;
	Hi = SSEUp;
	} else if (LDF == &llvm::APFloat::x87DoubleExtended()) {
	Lo = X87;
	Hi = X87Up;
	} else if (LDF == &llvm::APFloat::IEEEdouble()) {
	Current = SSE;
	} else
	llvm_unreachable("unexpected long double representation!");
	}
	// FIXME: _Decimal32 and _Decimal64 are SSE.
	// FIXME: _float128 and _Decimal128 are (SSE, SSEUp).
	return;
	}

	if (const EnumType *ET = Ty->getAs<EnumType>()) {
	// Classify the underlying integer type.
	classify(ET->getDecl()->getIntegerType(), OffsetBase, Lo, Hi, isNamedArg);
	return;
	}

	if (Ty->hasPointerRepresentation()) {
	Current = Integer;
	return;
	}

	if (Ty->isMemberPointerType()) {
	if (Ty->isMemberFunctionPointerType()) {
	if (Has64BitPointers) {
	// If Has64BitPointers, this is an {i64, i64}, so classify both
	// Lo and Hi now.
	Lo = Hi = Integer;
	} else {
	// Otherwise, with 32-bit pointers, this is an {i32, i32}. If that
	// straddles an eightbyte boundary, Hi should be classified as well.
	uint64_t EB_FuncPtr = (OffsetBase) / 64;
	uint64_t EB_ThisAdj = (OffsetBase + 64 - 1) / 64;
	if (EB_FuncPtr != EB_ThisAdj) {
	Lo = Hi = Integer;
	} else {
	Current = Integer;
	}
	}
	} else {
	Current = Integer;
	}
	return;
	}

	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	uint64_t Size = getContext().getTypeSize(VT);
	if (Size == 1 \|\| Size == 8 \|\| Size == 16 \|\| Size == 32) {
	// gcc passes the following as integer:
	// 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
	// 2 bytes - <2 x char>, <1 x short>
	// 1 byte - <1 x char>
	Current = Integer;

	// If this type crosses an eightbyte boundary, it should be
	// split.
	uint64_t EB_Lo = (OffsetBase) / 64;
	uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
	if (EB_Lo != EB_Hi)
	Hi = Lo;
	} else if (Size == 64) {
	QualType ElementType = VT->getElementType();

	// gcc passes <1 x double> in memory. :(
	if (ElementType->isSpecificBuiltinType(BuiltinType::Double))
	return;

	// gcc passes <1 x long long> as SSE but clang used to unconditionally
	// pass them as integer. For platforms where clang is the de facto
	// platform compiler, we must continue to use integer.
	if (!classifyIntegerMMXAsSSE() &&
	(ElementType->isSpecificBuiltinType(BuiltinType::LongLong) \|\|
	ElementType->isSpecificBuiltinType(BuiltinType::ULongLong) \|\|
	ElementType->isSpecificBuiltinType(BuiltinType::Long) \|\|
	ElementType->isSpecificBuiltinType(BuiltinType::ULong)))
	Current = Integer;
	else
	Current = SSE;

	// If this type crosses an eightbyte boundary, it should be
	// split.
	if (OffsetBase && OffsetBase != 64)
	Hi = Lo;
	} else if (Size == 128 \|\|
	(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
	QualType ElementType = VT->getElementType();

	// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
	if (passInt128VectorsInMem() && Size != 128 &&
	(ElementType->isSpecificBuiltinType(BuiltinType::Int128) \|\|
	ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
	return;

	// Arguments of 256-bits are split into four eightbyte chunks. The
	// least significant one belongs to class SSE and all the others to class
	// SSEUP. The original Lo and Hi design considers that types can't be
	// greater than 128-bits, so a 64-bit split in Hi and Lo makes sense.
	// This design isn't correct for 256-bits, but since there're no cases
	// where the upper parts would need to be inspected, avoid adding
	// complexity and just consider Hi to match the 64-256 part.
	//
	// Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in
	// registers if they are "named", i.e. not part of the "..." of a
	// variadic function.
	//
	// Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are
	// split into eight eightbyte chunks, one SSE and seven SSEUP.
	Lo = SSE;
	Hi = SSEUp;
	}
	return;
	}

	if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
	QualType ET = getContext().getCanonicalType(CT->getElementType());

	uint64_t Size = getContext().getTypeSize(Ty);
	if (ET->isIntegralOrEnumerationType()) {
	if (Size <= 64)
	Current = Integer;
	else if (Size <= 128)
	Lo = Hi = Integer;
	} else if (ET->isFloat16Type() \|\| ET == getContext().FloatTy \|\|
	ET->isBFloat16Type()) {
	Current = SSE;
	} else if (ET == getContext().DoubleTy) {
	Lo = Hi = SSE;
	} else if (ET == getContext().LongDoubleTy) {
	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
	if (LDF == &llvm::APFloat::IEEEquad())
	Current = Memory;
	else if (LDF == &llvm::APFloat::x87DoubleExtended())
	Current = ComplexX87;
	else if (LDF == &llvm::APFloat::IEEEdouble())
	Lo = Hi = SSE;
	else
	llvm_unreachable("unexpected long double representation!");
	}

	// If this complex type crosses an eightbyte boundary then it
	// should be split.
	uint64_t EB_Real = (OffsetBase) / 64;
	uint64_t EB_Imag = (OffsetBase + getContext().getTypeSize(ET)) / 64;
	if (Hi == NoClass && EB_Real != EB_Imag)
	Hi = Lo;

	return;
	}

	if (const auto *EITy = Ty->getAs<BitIntType>()) {
	if (EITy->getNumBits() <= 64)
	Current = Integer;
	else if (EITy->getNumBits() <= 128)
	Lo = Hi = Integer;
	// Larger values need to get passed in memory.
	return;
	}

	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
	// Arrays are treated like structures.

	uint64_t Size = getContext().getTypeSize(Ty);

	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
	// than eight eightbytes, ..., it has class MEMORY.
	// regcall ABI doesn't have limitation to an object. The only limitation
	// is the free registers, which will be checked in computeInfo.
	if (!IsRegCall && Size > 512)
	return;

	// AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned
	// fields, it has class MEMORY.
	//
	// Only need to check alignment of array base.
	if (OffsetBase % getContext().getTypeAlign(AT->getElementType()))
	return;

	// Otherwise implement simplified merge. We could be smarter about
	// this, but it isn't worth it and would be harder to verify.
	Current = NoClass;
	uint64_t EltSize = getContext().getTypeSize(AT->getElementType());
	uint64_t ArraySize = AT->getSize().getZExtValue();

	// The only case a 256-bit wide vector could be used is when the array
	// contains a single 256-bit element. Since Lo and Hi logic isn't extended
	// to work for sizes wider than 128, early check and fallback to memory.
	//
	if (Size > 128 &&
	(Size != EltSize \|\| Size > getNativeVectorSizeForAVXABI(AVXLevel)))
	return;

	for (uint64_t i=0, Offset=OffsetBase; i<ArraySize; ++i, Offset += EltSize) {
	Class FieldLo, FieldHi;
	classify(AT->getElementType(), Offset, FieldLo, FieldHi, isNamedArg);
	Lo = merge(Lo, FieldLo);
	Hi = merge(Hi, FieldHi);
	if (Lo == Memory \|\| Hi == Memory)
	break;
	}

	postMerge(Size, Lo, Hi);
	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp array classification.");
	return;
	}

	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	uint64_t Size = getContext().getTypeSize(Ty);

	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger
	// than eight eightbytes, ..., it has class MEMORY.
	if (Size > 512)
	return;

	// AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial
	// copy constructor or a non-trivial destructor, it is passed by invisible
	// reference.
	if (getRecordArgABI(RT, getCXXABI()))
	return;

	const RecordDecl *RD = RT->getDecl();

	// Assume variable sized types are passed in memory.
	if (RD->hasFlexibleArrayMember())
	return;

	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);

	// Reset Lo class, this will be recomputed.
	Current = NoClass;

	// If this is a C++ record, classify the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	for (const auto &I : CXXRD->bases()) {
	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
	"Unexpected base class!");
	const auto *Base =
	cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());

	// Classify this field.
	//
	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a
	// single eightbyte, each is classified separately. Each eightbyte gets
	// initialized to class NO_CLASS.
	Class FieldLo, FieldHi;
	uint64_t Offset =
	OffsetBase + getContext().toBits(Layout.getBaseClassOffset(Base));
	classify(I.getType(), Offset, FieldLo, FieldHi, isNamedArg);
	Lo = merge(Lo, FieldLo);
	Hi = merge(Hi, FieldHi);
	if (Lo == Memory \|\| Hi == Memory) {
	postMerge(Size, Lo, Hi);
	return;
	}
	}
	}

	// Classify the fields one at a time, merging the results.
	unsigned idx = 0;
	bool UseClang11Compat = getContext().getLangOpts().getClangABICompat() <=
	LangOptions::ClangABI::Ver11 \|\|
	getContext().getTargetInfo().getTriple().isPS();
	bool IsUnion = RT->isUnionType() && !UseClang11Compat;

	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
	i != e; ++i, ++idx) {
	uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
	bool BitField = i->isBitField();

	// Ignore padding bit-fields.
	if (BitField && i->isUnnamedBitfield())
	continue;

	// AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger than
	// eight eightbytes, or it contains unaligned fields, it has class MEMORY.
	//
	// The only case a 256-bit or a 512-bit wide vector could be used is when
	// the struct contains a single 256-bit or 512-bit element. Early check
	// and fallback to memory.
	//
	// FIXME: Extended the Lo and Hi logic properly to work for size wider
	// than 128.
	if (Size > 128 &&
	((!IsUnion && Size != getContext().getTypeSize(i->getType())) \|\|
	Size > getNativeVectorSizeForAVXABI(AVXLevel))) {
	Lo = Memory;
	postMerge(Size, Lo, Hi);
	return;
	}
	// Note, skip this test for bit-fields, see below.
	if (!BitField && Offset % getContext().getTypeAlign(i->getType())) {
	Lo = Memory;
	postMerge(Size, Lo, Hi);
	return;
	}

	// Classify this field.
	//
	// AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate
	// exceeds a single eightbyte, each is classified
	// separately. Each eightbyte gets initialized to class
	// NO_CLASS.
	Class FieldLo, FieldHi;

	// Bit-fields require special handling, they do not force the
	// structure to be passed in memory even if unaligned, and
	// therefore they can straddle an eightbyte.
	if (BitField) {
	assert(!i->isUnnamedBitfield());
	uint64_t Offset = OffsetBase + Layout.getFieldOffset(idx);
	uint64_t Size = i->getBitWidthValue(getContext());

	uint64_t EB_Lo = Offset / 64;
	uint64_t EB_Hi = (Offset + Size - 1) / 64;

	if (EB_Lo) {
	assert(EB_Hi == EB_Lo && "Invalid classification, type > 16 bytes.");
	FieldLo = NoClass;
	FieldHi = Integer;
	} else {
	FieldLo = Integer;
	FieldHi = EB_Hi ? Integer : NoClass;
	}
	} else
	classify(i->getType(), Offset, FieldLo, FieldHi, isNamedArg);
	Lo = merge(Lo, FieldLo);
	Hi = merge(Hi, FieldHi);
	if (Lo == Memory \|\| Hi == Memory)
	break;
	}

	postMerge(Size, Lo, Hi);
	}
	}

	ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
	// If this is a scalar LLVM value then assume LLVM will pass it in the right
	// place naturally.
	if (!isAggregateTypeForABI(Ty)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	if (Ty->isBitIntType())
	return getNaturalAlignIndirect(Ty);

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	return getNaturalAlignIndirect(Ty);
	}

	bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
	if (const VectorType *VecTy = Ty->getAs<VectorType>()) {
	uint64_t Size = getContext().getTypeSize(VecTy);
	unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
	if (Size <= 64 \|\| Size > LargestVector)
	return true;
	QualType EltTy = VecTy->getElementType();
	if (passInt128VectorsInMem() &&
	(EltTy->isSpecificBuiltinType(BuiltinType::Int128) \|\|
	EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
	return true;
	}

	return false;
	}

	ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
	unsigned freeIntRegs) const {
	// If this is a scalar LLVM value then assume LLVM will pass it in the right
	// place naturally.
	//
	// This assumption is optimistic, as there could be free registers available
	// when we need to pass this argument in memory, and LLVM could try to pass
	// the argument in the free register. This does not seem to happen currently,
	// but this code would be much safer if we could mark the argument with
	// 'onstack'. See PR12193.
	if (!isAggregateTypeForABI(Ty) && !IsIllegalVectorType(Ty) &&
	!Ty->isBitIntType()) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	// Compute the byval alignment. We specify the alignment of the byval in all
	// cases so that the mid-level optimizer knows the alignment of the byval.
	unsigned Align = std::max(getContext().getTypeAlign(Ty) / 8, 8U);

	// Attempt to avoid passing indirect results using byval when possible. This
	// is important for good codegen.
	//
	// We do this by coercing the value into a scalar type which the backend can
	// handle naturally (i.e., without using byval).
	//
	// For simplicity, we currently only do this when we have exhausted all of the
	// free integer registers. Doing this when there are free integer registers
	// would require more care, as we would have to ensure that the coerced value
	// did not claim the unused register. That would require either reording the
	// arguments to the function (so that any subsequent inreg values came first),
	// or only doing this optimization when there were no following arguments that
	// might be inreg.
	//
	// We currently expect it to be rare (particularly in well written code) for
	// arguments to be passed on the stack when there are still free integer
	// registers available (this would typically imply large structs being passed
	// by value), so this seems like a fair tradeoff for now.
	//
	// We can revisit this if the backend grows support for 'onstack' parameter
	// attributes. See PR12193.
	if (freeIntRegs == 0) {
	uint64_t Size = getContext().getTypeSize(Ty);

	// If this type fits in an eightbyte, coerce it into the matching integral
	// type, which will end up on the stack (with alignment 8).
	if (Align == 8 && Size <= 64)
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
	Size));
	}

	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
	}

	/// The ABI specifies that a value should be passed in a full vector XMM/YMM
	/// register. Pick an LLVM IR type that will be passed as a vector register.
	llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
	// Wrapper structs/arrays that only contain vectors are passed just like
	// vectors; strip them off if present.
	if (const Type *InnerTy = isSingleElementStruct(Ty, getContext()))
	Ty = QualType(InnerTy, 0);

	llvm::Type *IRType = CGT.ConvertType(Ty);
	if (isa<llvm::VectorType>(IRType)) {
	// Don't pass vXi128 vectors in their native type, the backend can't
	// legalize them.
	if (passInt128VectorsInMem() &&
	cast<llvm::VectorType>(IRType)->getElementType()->isIntegerTy(128)) {
	// Use a vXi64 vector.
	uint64_t Size = getContext().getTypeSize(Ty);
	return llvm::FixedVectorType::get(llvm::Type::getInt64Ty(getVMContext()),
	Size / 64);
	}

	return IRType;
	}

	if (IRType->getTypeID() == llvm::Type::FP128TyID)
	return IRType;

	// We couldn't find the preferred IR vector type for 'Ty'.
	uint64_t Size = getContext().getTypeSize(Ty);
	assert((Size == 128 \|\| Size == 256 \|\| Size == 512) && "Invalid type found!");


	// Return a LLVM IR vector type based on the size of 'Ty'.
	return llvm::FixedVectorType::get(llvm::Type::getDoubleTy(getVMContext()),
	Size / 64);
	}

	/// BitsContainNoUserData - Return true if the specified [start,end) bit range
	/// is known to either be off the end of the specified type or being in
	/// alignment padding. The user type specified is known to be at most 128 bits
	/// in size, and have passed through X86_64ABIInfo::classify with a successful
	/// classification that put one of the two halves in the INTEGER class.
	///
	/// It is conservatively correct to return false.
	static bool BitsContainNoUserData(QualType Ty, unsigned StartBit,
	unsigned EndBit, ASTContext &Context) {
	// If the bytes being queried are off the end of the type, there is no user
	// data hiding here. This handles analysis of builtins, vectors and other
	// types that don't contain interesting padding.
	unsigned TySize = (unsigned)Context.getTypeSize(Ty);
	if (TySize <= StartBit)
	return true;

	if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
	unsigned EltSize = (unsigned)Context.getTypeSize(AT->getElementType());
	unsigned NumElts = (unsigned)AT->getSize().getZExtValue();

	// Check each element to see if the element overlaps with the queried range.
	for (unsigned i = 0; i != NumElts; ++i) {
	// If the element is after the span we care about, then we're done..
	unsigned EltOffset = i*EltSize;
	if (EltOffset >= EndBit) break;

	unsigned EltStart = EltOffset < StartBit ? StartBit-EltOffset :0;
	if (!BitsContainNoUserData(AT->getElementType(), EltStart,
	EndBit-EltOffset, Context))
	return false;
	}
	// If it overlaps no elements, then it is safe to process as padding.
	return true;
	}

	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	for (const auto &I : CXXRD->bases()) {
	assert(!I.isVirtual() && !I.getType()->isDependentType() &&
	"Unexpected base class!");
	const auto *Base =
	cast<CXXRecordDecl>(I.getType()->castAs<RecordType>()->getDecl());

	// If the base is after the span we care about, ignore it.
	unsigned BaseOffset = Context.toBits(Layout.getBaseClassOffset(Base));
	if (BaseOffset >= EndBit) continue;

	unsigned BaseStart = BaseOffset < StartBit ? StartBit-BaseOffset :0;
	if (!BitsContainNoUserData(I.getType(), BaseStart,
	EndBit-BaseOffset, Context))
	return false;
	}
	}

	// Verify that no field has data that overlaps the region of interest. Yes
	// this could be sped up a lot by being smarter about queried fields,
	// however we're only looking at structs up to 16 bytes, so we don't care
	// much.
	unsigned idx = 0;
	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
	i != e; ++i, ++idx) {
	unsigned FieldOffset = (unsigned)Layout.getFieldOffset(idx);

	// If we found a field after the region we care about, then we're done.
	if (FieldOffset >= EndBit) break;

	unsigned FieldStart = FieldOffset < StartBit ? StartBit-FieldOffset :0;
	if (!BitsContainNoUserData(i->getType(), FieldStart, EndBit-FieldOffset,
	Context))
	return false;
	}

	// If nothing in this record overlapped the area of interest, then we're
	// clean.
	return true;
	}

	return false;
	}

	/// getFPTypeAtOffset - Return a floating point type at the specified offset.
	static llvm::Type getFPTypeAtOffset(llvm::Type IRType, unsigned IROffset,
	const llvm::DataLayout &TD) {
	if (IROffset == 0 && IRType->isFloatingPointTy())
	return IRType;

	// If this is a struct, recurse into the field at the specified offset.
	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
	if (!STy->getNumContainedTypes())
	return nullptr;

	const llvm::StructLayout *SL = TD.getStructLayout(STy);
	unsigned Elt = SL->getElementContainingOffset(IROffset);
	IROffset -= SL->getElementOffset(Elt);
	return getFPTypeAtOffset(STy->getElementType(Elt), IROffset, TD);
	}

	// If this is an array, recurse into the field at the specified offset.
	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
	llvm::Type *EltTy = ATy->getElementType();
	unsigned EltSize = TD.getTypeAllocSize(EltTy);
	IROffset -= IROffset / EltSize * EltSize;
	return getFPTypeAtOffset(EltTy, IROffset, TD);
	}

	return nullptr;
	}

	/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the
	/// low 8 bytes of an XMM register, corresponding to the SSE class.
	llvm::Type *X86_64ABIInfo::
	GetSSETypeAtOffset(llvm::Type *IRType, unsigned IROffset,
	QualType SourceTy, unsigned SourceOffset) const {
	const llvm::DataLayout &TD = getDataLayout();
	unsigned SourceSize =
	(unsigned)getContext().getTypeSize(SourceTy) / 8 - SourceOffset;
	llvm::Type *T0 = getFPTypeAtOffset(IRType, IROffset, TD);
	if (!T0 \|\| T0->isDoubleTy())
	return llvm::Type::getDoubleTy(getVMContext());

	// Get the adjacent FP type.
	llvm::Type *T1 = nullptr;
	unsigned T0Size = TD.getTypeAllocSize(T0);
	if (SourceSize > T0Size)
	T1 = getFPTypeAtOffset(IRType, IROffset + T0Size, TD);
	if (T1 == nullptr) {
	// Check if IRType is a half/bfloat + float. float type will be in IROffset+4 due
	// to its alignment.
	if (T0->is16bitFPTy() && SourceSize > 4)
	T1 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
	// If we can't get a second FP type, return a simple half or float.
	// avx512fp16-abi.c:pr51813_2 shows it works to return float for
	// {float, i8} too.
	if (T1 == nullptr)
	return T0;
	}

	if (T0->isFloatTy() && T1->isFloatTy())
	return llvm::FixedVectorType::get(T0, 2);

	if (T0->is16bitFPTy() && T1->is16bitFPTy()) {
	llvm::Type *T2 = nullptr;
	if (SourceSize > 4)
	T2 = getFPTypeAtOffset(IRType, IROffset + 4, TD);
	if (T2 == nullptr)
	return llvm::FixedVectorType::get(T0, 2);
	return llvm::FixedVectorType::get(T0, 4);
	}

	if (T0->is16bitFPTy() \|\| T1->is16bitFPTy())
	return llvm::FixedVectorType::get(llvm::Type::getHalfTy(getVMContext()), 4);

	return llvm::Type::getDoubleTy(getVMContext());
	}


	/// GetINTEGERTypeAtOffset - The ABI specifies that a value should be passed in
	/// an 8-byte GPR. This means that we either have a scalar or we are talking
	/// about the high or low part of an up-to-16-byte struct. This routine picks
	/// the best LLVM IR type to represent this, which may be i64 or may be anything
	/// else that the backend will pass in a GPR that works better (e.g. i8, %foo*,
	/// etc).
	///
	/// PrefType is an LLVM IR type that corresponds to (part of) the IR type for
	/// the source type. IROffset is an offset in bytes into the LLVM IR type that
	/// the 8-byte value references. PrefType may be null.
	///
	/// SourceTy is the source-level type for the entire argument. SourceOffset is
	/// an offset into this that we're processing (which is always either 0 or 8).
	///
	llvm::Type *X86_64ABIInfo::
	GetINTEGERTypeAtOffset(llvm::Type *IRType, unsigned IROffset,
	QualType SourceTy, unsigned SourceOffset) const {
	// If we're dealing with an un-offset LLVM IR type, then it means that we're
	// returning an 8-byte unit starting with it. See if we can safely use it.
	if (IROffset == 0) {
	// Pointers and int64's always fill the 8-byte unit.
	if ((isa<llvm::PointerType>(IRType) && Has64BitPointers) \|\|
	IRType->isIntegerTy(64))
	return IRType;

	// If we have a 1/2/4-byte integer, we can use it only if the rest of the
	// goodness in the source type is just tail padding. This is allowed to
	// kick in for struct {double,int} on the int, but not on
	// struct{double,int,int} because we wouldn't return the second int. We
	// have to do this analysis on the source type because we can't depend on
	// unions being lowered a specific way etc.
	if (IRType->isIntegerTy(8) \|\| IRType->isIntegerTy(16) \|\|
	IRType->isIntegerTy(32) \|\|
	(isa<llvm::PointerType>(IRType) && !Has64BitPointers)) {
	unsigned BitWidth = isa<llvm::PointerType>(IRType) ? 32 :
	cast<llvm::IntegerType>(IRType)->getBitWidth();

	if (BitsContainNoUserData(SourceTy, SourceOffset*8+BitWidth,
	SourceOffset*8+64, getContext()))
	return IRType;
	}
	}

	if (llvm::StructType *STy = dyn_cast<llvm::StructType>(IRType)) {
	// If this is a struct, recurse into the field at the specified offset.
	const llvm::StructLayout *SL = getDataLayout().getStructLayout(STy);
	if (IROffset < SL->getSizeInBytes()) {
	unsigned FieldIdx = SL->getElementContainingOffset(IROffset);
	IROffset -= SL->getElementOffset(FieldIdx);

	return GetINTEGERTypeAtOffset(STy->getElementType(FieldIdx), IROffset,
	SourceTy, SourceOffset);
	}
	}

	if (llvm::ArrayType *ATy = dyn_cast<llvm::ArrayType>(IRType)) {
	llvm::Type *EltTy = ATy->getElementType();
	unsigned EltSize = getDataLayout().getTypeAllocSize(EltTy);
	unsigned EltOffset = IROffset/EltSize*EltSize;
	return GetINTEGERTypeAtOffset(EltTy, IROffset-EltOffset, SourceTy,
	SourceOffset);
	}

	// Okay, we don't have any better idea of what to pass, so we pass this in an
	// integer register that isn't too big to fit the rest of the struct.
	unsigned TySizeInBytes =
	(unsigned)getContext().getTypeSizeInChars(SourceTy).getQuantity();

	assert(TySizeInBytes != SourceOffset && "Empty field?");

	// It is always safe to classify this as an integer type up to i64 that
	// isn't larger than the structure.
	return llvm::IntegerType::get(getVMContext(),
	std::min(TySizeInBytes-SourceOffset, 8U)*8);
	}


	/// GetX86_64ByValArgumentPair - Given a high and low type that can ideally
	/// be used as elements of a two register pair to pass or return, return a
	/// first class aggregate to represent them. For example, if the low part of
	/// a by-value argument should be passed as i32* and the high part as float,
	/// return {i32*, float}.
	static llvm::Type *
	GetX86_64ByValArgumentPair(llvm::Type Lo, llvm::Type Hi,
	const llvm::DataLayout &TD) {
	// In order to correctly satisfy the ABI, we need to the high part to start
	// at offset 8. If the high and low parts we inferred are both 4-byte types
	// (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have
	// the second element at offset 8. Check for this:
	unsigned LoSize = (unsigned)TD.getTypeAllocSize(Lo);
	unsigned HiAlign = TD.getABITypeAlignment(Hi);
	unsigned HiStart = llvm::alignTo(LoSize, HiAlign);
	assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!");

	// To handle this, we have to increase the size of the low part so that the
	// second element will start at an 8 byte offset. We can't increase the size
	// of the second element because it might make us access off the end of the
	// struct.
	if (HiStart != 8) {
	// There are usually two sorts of types the ABI generation code can produce
	// for the low part of a pair that aren't 8 bytes in size: half, float or
	// i8/i16/i32. This can also include pointers when they are 32-bit (X32 and
	// NaCl).
	// Promote these to a larger type.
	if (Lo->isHalfTy() \|\| Lo->isFloatTy())
	Lo = llvm::Type::getDoubleTy(Lo->getContext());
	else {
	assert((Lo->isIntegerTy() \|\| Lo->isPointerTy())
	&& "Invalid/unknown lo type");
	Lo = llvm::Type::getInt64Ty(Lo->getContext());
	}
	}

	llvm::StructType *Result = llvm::StructType::get(Lo, Hi);

	// Verify that the second element is at an 8-byte offset.
	assert(TD.getStructLayout(Result)->getElementOffset(1) == 8 &&
	"Invalid x86-64 argument pair!");
	return Result;
	}

	ABIArgInfo X86_64ABIInfo::
	classifyReturnType(QualType RetTy) const {
	// AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the
	// classification algorithm.
	X86_64ABIInfo::Class Lo, Hi;
	classify(RetTy, 0, Lo, Hi, /isNamedArg/ true);

	// Check some invariants.
	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");

	llvm::Type *ResType = nullptr;
	switch (Lo) {
	case NoClass:
	if (Hi == NoClass)
	return ABIArgInfo::getIgnore();
	// If the low part is just padding, it takes no register, leave ResType
	// null.
	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
	"Unknown missing lo part");
	break;

	case SSEUp:
	case X87Up:
	llvm_unreachable("Invalid classification for lo word.");

	// AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via
	// hidden argument.
	case Memory:
	return getIndirectReturnResult(RetTy);

	// AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next
	// available register of the sequence %rax, %rdx is used.
	case Integer:
	ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);

	// If we have a sign or zero extended integer, make sure to return Extend
	// so that the parameter gets the right LLVM IR attributes.
	if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (RetTy->isIntegralOrEnumerationType() &&
	isPromotableIntegerTypeForABI(RetTy))
	return ABIArgInfo::getExtend(RetTy);
	}
	break;

	// AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next
	// available SSE register of the sequence %xmm0, %xmm1 is used.
	case SSE:
	ResType = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 0, RetTy, 0);
	break;

	// AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is
	// returned on the X87 stack in %st0 as 80-bit x87 number.
	case X87:
	ResType = llvm::Type::getX86_FP80Ty(getVMContext());
	break;

	// AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real
	// part of the value is returned in %st0 and the imaginary part in
	// %st1.
	case ComplexX87:
	assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification.");
	ResType = llvm::StructType::get(llvm::Type::getX86_FP80Ty(getVMContext()),
	llvm::Type::getX86_FP80Ty(getVMContext()));
	break;
	}

	llvm::Type *HighPart = nullptr;
	switch (Hi) {
	// Memory was handled previously and X87 should
	// never occur as a hi class.
	case Memory:
	case X87:
	llvm_unreachable("Invalid classification for hi word.");

	case ComplexX87: // Previously handled.
	case NoClass:
	break;

	case Integer:
	HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
	return ABIArgInfo::getDirect(HighPart, 8);
	break;
	case SSE:
	HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
	return ABIArgInfo::getDirect(HighPart, 8);
	break;

	// AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte
	// is passed in the next available eightbyte chunk if the last used
	// vector register.
	//
	// SSEUP should always be preceded by SSE, just widen.
	case SSEUp:
	assert(Lo == SSE && "Unexpected SSEUp classification.");
	ResType = GetByteVectorType(RetTy);
	break;

	// AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is
	// returned together with the previous X87 value in %st0.
	case X87Up:
	// If X87Up is preceded by X87, we don't need to do
	// anything. However, in some cases with unions it may not be
	// preceded by X87. In such situations we follow gcc and pass the
	// extra bits in an SSE reg.
	if (Lo != X87) {
	HighPart = GetSSETypeAtOffset(CGT.ConvertType(RetTy), 8, RetTy, 8);
	if (Lo == NoClass) // Return HighPart at offset 8 in memory.
	return ABIArgInfo::getDirect(HighPart, 8);
	}
	break;
	}

	// If a high part was specified, merge it together with the low part. It is
	// known to pass in the high eightbyte of the result. We do this by forming a
	// first class struct aggregate with the high and low part: {low, high}
	if (HighPart)
	ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());

	return ABIArgInfo::getDirect(ResType);
	}

	ABIArgInfo
	X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs,
	unsigned &neededInt, unsigned &neededSSE,
	bool isNamedArg, bool IsRegCall) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	X86_64ABIInfo::Class Lo, Hi;
	classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall);

	// Check some invariants.
	// FIXME: Enforce these by construction.
	assert((Hi != Memory \|\| Lo == Memory) && "Invalid memory classification.");
	assert((Hi != SSEUp \|\| Lo == SSE) && "Invalid SSEUp classification.");

	neededInt = 0;
	neededSSE = 0;
	llvm::Type *ResType = nullptr;
	switch (Lo) {
	case NoClass:
	if (Hi == NoClass)
	return ABIArgInfo::getIgnore();
	// If the low part is just padding, it takes no register, leave ResType
	// null.
	assert((Hi == SSE \|\| Hi == Integer \|\| Hi == X87Up) &&
	"Unknown missing lo part");
	break;

	// AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument
	// on the stack.
	case Memory:

	// AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or
	// COMPLEX_X87, it is passed in memory.
	case X87:
	case ComplexX87:
	if (getRecordArgABI(Ty, getCXXABI()) == CGCXXABI::RAA_Indirect)
	++neededInt;
	return getIndirectResult(Ty, freeIntRegs);

	case SSEUp:
	case X87Up:
	llvm_unreachable("Invalid classification for lo word.");

	// AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next
	// available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8
	// and %r9 is used.
	case Integer:
	++neededInt;

	// Pick an 8-byte type based on the preferred type.
	ResType = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 0, Ty, 0);

	// If we have a sign or zero extended integer, make sure to return Extend
	// so that the parameter gets the right LLVM IR attributes.
	if (Hi == NoClass && isa<llvm::IntegerType>(ResType)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	if (Ty->isIntegralOrEnumerationType() &&
	isPromotableIntegerTypeForABI(Ty))
	return ABIArgInfo::getExtend(Ty);
	}

	break;

	// AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next
	// available SSE register is used, the registers are taken in the
	// order from %xmm0 to %xmm7.
	case SSE: {
	llvm::Type *IRType = CGT.ConvertType(Ty);
	ResType = GetSSETypeAtOffset(IRType, 0, Ty, 0);
	++neededSSE;
	break;
	}
	}

	llvm::Type *HighPart = nullptr;
	switch (Hi) {
	// Memory was handled previously, ComplexX87 and X87 should
	// never occur as hi classes, and X87Up must be preceded by X87,
	// which is passed in memory.
	case Memory:
	case X87:
	case ComplexX87:
	llvm_unreachable("Invalid classification for hi word.");

	case NoClass: break;

	case Integer:
	++neededInt;
	// Pick an 8-byte type based on the preferred type.
	HighPart = GetINTEGERTypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);

	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
	return ABIArgInfo::getDirect(HighPart, 8);
	break;

	// X87Up generally doesn't occur here (long double is passed in
	// memory), except in situations involving unions.
	case X87Up:
	case SSE:
	HighPart = GetSSETypeAtOffset(CGT.ConvertType(Ty), 8, Ty, 8);

	if (Lo == NoClass) // Pass HighPart at offset 8 in memory.
	return ABIArgInfo::getDirect(HighPart, 8);

	++neededSSE;
	break;

	// AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the
	// eightbyte is passed in the upper half of the last used SSE
	// register. This only happens when 128-bit vectors are passed.
	case SSEUp:
	assert(Lo == SSE && "Unexpected SSEUp classification");
	ResType = GetByteVectorType(Ty);
	break;
	}

	// If a high part was specified, merge it together with the low part. It is
	// known to pass in the high eightbyte of the result. We do this by forming a
	// first class struct aggregate with the high and low part: {low, high}
	if (HighPart)
	ResType = GetX86_64ByValArgumentPair(ResType, HighPart, getDataLayout());

	return ABIArgInfo::getDirect(ResType);
	}

	ABIArgInfo
	X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt,
	unsigned &NeededSSE,
	unsigned &MaxVectorWidth) const {
	auto RT = Ty->getAs<RecordType>();
	assert(RT && "classifyRegCallStructType only valid with struct types");

	if (RT->getDecl()->hasFlexibleArrayMember())
	return getIndirectReturnResult(Ty);

	// Sum up bases
	if (auto CXXRD = dyn_cast<CXXRecordDecl>(RT->getDecl())) {
	if (CXXRD->isDynamicClass()) {
	NeededInt = NeededSSE = 0;
	return getIndirectReturnResult(Ty);
	}

	for (const auto &I : CXXRD->bases())
	if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE,
	MaxVectorWidth)
	.isIndirect()) {
	NeededInt = NeededSSE = 0;
	return getIndirectReturnResult(Ty);
	}
	}

	// Sum up members
	for (const auto *FD : RT->getDecl()->fields()) {
	QualType MTy = FD->getType();
	if (MTy->isRecordType() && !MTy->isUnionType()) {
	if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE,
	MaxVectorWidth)
	.isIndirect()) {
	NeededInt = NeededSSE = 0;
	return getIndirectReturnResult(Ty);
	}
	} else {
	unsigned LocalNeededInt, LocalNeededSSE;
	if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE,
	true, true)
	.isIndirect()) {
	NeededInt = NeededSSE = 0;
	return getIndirectReturnResult(Ty);
	}
	if (const auto *AT = getContext().getAsConstantArrayType(MTy))
	MTy = AT->getElementType();
	if (const auto *VT = MTy->getAs<VectorType>())
	if (getContext().getTypeSize(VT) > MaxVectorWidth)
	MaxVectorWidth = getContext().getTypeSize(VT);
	NeededInt += LocalNeededInt;
	NeededSSE += LocalNeededSSE;
	}
	}

	return ABIArgInfo::getDirect();
	}

	ABIArgInfo
	X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt,
	unsigned &NeededSSE,
	unsigned &MaxVectorWidth) const {

	NeededInt = 0;
	NeededSSE = 0;
	MaxVectorWidth = 0;

	return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE,
	MaxVectorWidth);
	}

	void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {

	const unsigned CallingConv = FI.getCallingConvention();
	// It is possible to force Win64 calling convention on any x86_64 target by
	// using __attribute__((ms_abi)). In such case to correctly emit Win64
	// compatible code delegate this call to WinX86_64ABIInfo::computeInfo.
	if (CallingConv == llvm::CallingConv::Win64) {
	WinX86_64ABIInfo Win64ABIInfo(CGT, AVXLevel);
	Win64ABIInfo.computeInfo(FI);
	return;
	}

	bool IsRegCall = CallingConv == llvm::CallingConv::X86_RegCall;

	// Keep track of the number of assigned registers.
	unsigned FreeIntRegs = IsRegCall ? 11 : 6;
	unsigned FreeSSERegs = IsRegCall ? 16 : 8;
	unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0;

	if (!::classifyReturnType(getCXXABI(), FI, *this)) {
	if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() &&
	!FI.getReturnType()->getTypePtr()->isUnionType()) {
	FI.getReturnInfo() = classifyRegCallStructType(
	FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth);
	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
	FreeIntRegs -= NeededInt;
	FreeSSERegs -= NeededSSE;
	} else {
	FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
	}
	} else if (IsRegCall && FI.getReturnType()->getAs<ComplexType>() &&
	getContext().getCanonicalType(FI.getReturnType()
	->getAs<ComplexType>()
	->getElementType()) ==
	getContext().LongDoubleTy)
	// Complex Long Double Type is passed in Memory when Regcall
	// calling convention is used.
	FI.getReturnInfo() = getIndirectReturnResult(FI.getReturnType());
	else
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	}

	// If the return value is indirect, then the hidden argument is consuming one
	// integer register.
	if (FI.getReturnInfo().isIndirect())
	--FreeIntRegs;
	else if (NeededSSE && MaxVectorWidth > 0)
	FI.setMaxVectorWidth(MaxVectorWidth);

	// The chain argument effectively gives us another free register.
	if (FI.isChainCall())
	++FreeIntRegs;

	unsigned NumRequiredArgs = FI.getNumRequiredArgs();
	// AMD64-ABI 3.2.3p3: Once arguments are classified, the registers
	// get assigned (in left-to-right order) for passing as follows...
	unsigned ArgNo = 0;
	for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
	it != ie; ++it, ++ArgNo) {
	bool IsNamedArg = ArgNo < NumRequiredArgs;

	if (IsRegCall && it->type->isStructureOrClassType())
	it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE,
	MaxVectorWidth);
	else
	it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt,
	NeededSSE, IsNamedArg);

	// AMD64-ABI 3.2.3p3: If there are no registers available for any
	// eightbyte of an argument, the whole argument is passed on the
	// stack. If registers have already been assigned for some
	// eightbytes of such an argument, the assignments get reverted.
	if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) {
	FreeIntRegs -= NeededInt;
	FreeSSERegs -= NeededSSE;
	if (MaxVectorWidth > FI.getMaxVectorWidth())
	FI.setMaxVectorWidth(MaxVectorWidth);
	} else {
	it->info = getIndirectResult(it->type, FreeIntRegs);
	}
	}
	}

	static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
	Address VAListAddr, QualType Ty) {
	Address overflow_arg_area_p =
	CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_p");
	llvm::Value *overflow_arg_area =
	CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");

	// AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16
	// byte boundary if alignment needed by type exceeds 8 byte boundary.
	// It isn't stated explicitly in the standard, but in practice we use
	// alignment greater than 16 where necessary.
	CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
	if (Align > CharUnits::fromQuantity(8)) {
	overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
	Align);
	}

	// AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
	llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
	llvm::Value *Res =
	CGF.Builder.CreateBitCast(overflow_arg_area,
	llvm::PointerType::getUnqual(LTy));

	// AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to:
	// l->overflow_arg_area + sizeof(type).
	// AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to
	// an 8 byte boundary.

	uint64_t SizeInBytes = (CGF.getContext().getTypeSize(Ty) + 7) / 8;
	llvm::Value *Offset =
	llvm::ConstantInt::get(CGF.Int32Ty, (SizeInBytes + 7) & ~7);
	overflow_arg_area = CGF.Builder.CreateGEP(CGF.Int8Ty, overflow_arg_area,
	Offset, "overflow_arg_area.next");
	CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);

	// AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
	return Address(Res, LTy, Align);
	}

	Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	// Assume that va_list type is correct; should be pointer to LLVM type:
	// struct {
	// i32 gp_offset;
	// i32 fp_offset;
	// i8* overflow_arg_area;
	// i8* reg_save_area;
	// };
	unsigned neededInt, neededSSE;

	Ty = getContext().getCanonicalType(Ty);
	ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
	/isNamedArg/false);

	// AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
	// in the registers. If not go to step 7.
	if (!neededInt && !neededSSE)
	return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);

	// AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
	// general purpose registers needed to pass type and num_fp to hold
	// the number of floating point registers needed.

	// AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into
	// registers. In the case: l->gp_offset > 48 - num_gp * 8 or
	// l->fp_offset > 304 - num_fp * 16 go to step 7.
	//
	// NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of
	// register save space).

	llvm::Value *InRegs = nullptr;
	Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
	llvm::Value gp_offset = nullptr, fp_offset = nullptr;
	if (neededInt) {
	gp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "gp_offset_p");
	gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
	InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
	InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
	}

	if (neededSSE) {
	fp_offset_p = CGF.Builder.CreateStructGEP(VAListAddr, 1, "fp_offset_p");
	fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
	llvm::Value *FitsInFP =
	llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
	FitsInFP = CGF.Builder.CreateICmpULE(fp_offset, FitsInFP, "fits_in_fp");
	InRegs = InRegs ? CGF.Builder.CreateAnd(InRegs, FitsInFP) : FitsInFP;
	}

	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
	llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
	llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
	CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);

	// Emit code to load the value if it was passed in registers.

	CGF.EmitBlock(InRegBlock);

	// AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with
	// an offset of l->gp_offset and/or l->fp_offset. This may require
	// copying to a temporary location in case the parameter is passed
	// in different register classes or requires an alignment greater
	// than 8 for general purpose registers and 16 for XMM registers.
	//
	// FIXME: This really results in shameful code when we end up needing to
	// collect arguments from different places; often what should result in a
	// simple assembling of a structure from scattered addresses has many more
	// loads than necessary. Can we clean this up?
	llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
	llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
	CGF.Builder.CreateStructGEP(VAListAddr, 3), "reg_save_area");

	Address RegAddr = Address::invalid();
	if (neededInt && neededSSE) {
	// FIXME: Cleanup.
	assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
	llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
	Address Tmp = CGF.CreateMemTemp(Ty);
	Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
	assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
	llvm::Type *TyLo = ST->getElementType(0);
	llvm::Type *TyHi = ST->getElementType(1);
	assert((TyLo->isFPOrFPVectorTy() ^ TyHi->isFPOrFPVectorTy()) &&
	"Unexpected ABI info for mixed regs");
	llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
	llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
	llvm::Value *GPAddr =
	CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset);
	llvm::Value *FPAddr =
	CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset);
	llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
	llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;

	// Copy the first element.
	// FIXME: Our choice of alignment here and below is probably pessimistic.
	llvm::Value *V = CGF.Builder.CreateAlignedLoad(
	TyLo, CGF.Builder.CreateBitCast(RegLoAddr, PTyLo),
	CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyLo)));
	CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));

	// Copy the second element.
	V = CGF.Builder.CreateAlignedLoad(
	TyHi, CGF.Builder.CreateBitCast(RegHiAddr, PTyHi),
	CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(TyHi)));
	CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));

	RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
	} else if (neededInt) {
	RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, gp_offset),
	CGF.Int8Ty, CharUnits::fromQuantity(8));
	RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);

	// Copy to a temporary if necessary to ensure the appropriate alignment.
	auto TInfo = getContext().getTypeInfoInChars(Ty);
	uint64_t TySize = TInfo.Width.getQuantity();
	CharUnits TyAlign = TInfo.Align;

	// Copy into a temporary if the type is more aligned than the
	// register save area.
	if (TyAlign.getQuantity() > 8) {
	Address Tmp = CGF.CreateMemTemp(Ty);
	CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
	RegAddr = Tmp;
	}

	} else if (neededSSE == 1) {
	RegAddr = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, fp_offset),
	CGF.Int8Ty, CharUnits::fromQuantity(16));
	RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
	} else {
	assert(neededSSE == 2 && "Invalid number of needed registers!");
	// SSE registers are spaced 16 bytes apart in the register save
	// area, we need to collect the two eightbytes together.
	// The ABI isn't explicit about this, but it seems reasonable
	// to assume that the slots are 16-byte aligned, since the stack is
	// naturally 16-byte aligned and the prologue is expected to store
	// all the SSE registers to the RSA.
	Address RegAddrLo = Address(CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea,
	fp_offset),
	CGF.Int8Ty, CharUnits::fromQuantity(16));
	Address RegAddrHi =
	CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
	CharUnits::fromQuantity(16));
	llvm::Type *ST = AI.canHaveCoerceToType()
	? AI.getCoerceToType()
	: llvm::StructType::get(CGF.DoubleTy, CGF.DoubleTy);
	llvm::Value *V;
	Address Tmp = CGF.CreateMemTemp(Ty);
	Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
	V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
	RegAddrLo, ST->getStructElementType(0)));
	CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 0));
	V = CGF.Builder.CreateLoad(CGF.Builder.CreateElementBitCast(
	RegAddrHi, ST->getStructElementType(1)));
	CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1));

	RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
	}

	// AMD64-ABI 3.5.7p5: Step 5. Set:
	// l->gp_offset = l->gp_offset + num_gp * 8
	// l->fp_offset = l->fp_offset + num_fp * 16.
	if (neededInt) {
	llvm::Value Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededInt 8);
	CGF.Builder.CreateStore(CGF.Builder.CreateAdd(gp_offset, Offset),
	gp_offset_p);
	}
	if (neededSSE) {
	llvm::Value Offset = llvm::ConstantInt::get(CGF.Int32Ty, neededSSE 16);
	CGF.Builder.CreateStore(CGF.Builder.CreateAdd(fp_offset, Offset),
	fp_offset_p);
	}
	CGF.EmitBranch(ContBlock);

	// Emit code to load the value if it was passed in memory.

	CGF.EmitBlock(InMemBlock);
	Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);

	// Return the appropriate result.

	CGF.EmitBlock(ContBlock);
	Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
	"vaarg.addr");
	return ResAddr;
	}

	Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
	// not 1, 2, 4, or 8 bytes, must be passed by reference."
	uint64_t Width = getContext().getTypeSize(Ty);
	bool IsIndirect = Width > 64 \|\| !llvm::isPowerOf2_64(Width);

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
	CGF.getContext().getTypeInfoInChars(Ty),
	CharUnits::fromQuantity(8),
	/allowHigherAlign/ false);
	}

	ABIArgInfo WinX86_64ABIInfo::reclassifyHvaArgForVectorCall(
	QualType Ty, unsigned &FreeSSERegs, const ABIArgInfo &current) const {
	const Type *Base = nullptr;
	uint64_t NumElts = 0;

	if (!Ty->isBuiltinType() && !Ty->isVectorType() &&
	isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) {
	FreeSSERegs -= NumElts;
	return getDirectX86Hva();
	}
	return current;
	}

	ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
	bool IsReturnType, bool IsVectorCall,
	bool IsRegCall) const {

	if (Ty->isVoidType())
	return ABIArgInfo::getIgnore();

	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	TypeInfo Info = getContext().getTypeInfo(Ty);
	uint64_t Width = Info.Width;
	CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);

	const RecordType *RT = Ty->getAs<RecordType>();
	if (RT) {
	if (!IsReturnType) {
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
	}

	if (RT->getDecl()->hasFlexibleArrayMember())
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	}

	const Type *Base = nullptr;
	uint64_t NumElts = 0;
	// vectorcall adds the concept of a homogenous vector aggregate, similar to
	// other targets.
	if ((IsVectorCall \|\| IsRegCall) &&
	isHomogeneousAggregate(Ty, Base, NumElts)) {
	if (IsRegCall) {
	if (FreeSSERegs >= NumElts) {
	FreeSSERegs -= NumElts;
	if (IsReturnType \|\| Ty->isBuiltinType() \|\| Ty->isVectorType())
	return ABIArgInfo::getDirect();
	return ABIArgInfo::getExpand();
	}
	return ABIArgInfo::getIndirect(Align, /ByVal=/false);
	} else if (IsVectorCall) {
	if (FreeSSERegs >= NumElts &&
	(IsReturnType \|\| Ty->isBuiltinType() \|\| Ty->isVectorType())) {
	FreeSSERegs -= NumElts;
	return ABIArgInfo::getDirect();
	} else if (IsReturnType) {
	return ABIArgInfo::getExpand();
	} else if (!Ty->isBuiltinType() && !Ty->isVectorType()) {
	// HVAs are delayed and reclassified in the 2nd step.
	return ABIArgInfo::getIndirect(Align, /ByVal=/false);
	}
	}
	}

	if (Ty->isMemberPointerType()) {
	// If the member pointer is represented by an LLVM int or ptr, pass it
	// directly.
	llvm::Type *LLTy = CGT.ConvertType(Ty);
	if (LLTy->isPointerTy() \|\| LLTy->isIntegerTy())
	return ABIArgInfo::getDirect();
	}

	if (RT \|\| Ty->isAnyComplexType() \|\| Ty->isMemberPointerType()) {
	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
	// not 1, 2, 4, or 8 bytes, must be passed by reference."
	if (Width > 64 \|\| !llvm::isPowerOf2_64(Width))
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	// Otherwise, coerce it to a small integer.
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
	}

	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	switch (BT->getKind()) {
	case BuiltinType::Bool:
	// Bool type is always extended to the ABI, other builtin types are not
	// extended.
	return ABIArgInfo::getExtend(Ty);

	case BuiltinType::LongDouble:
	// Mingw64 GCC uses the old 80 bit extended precision floating point
	// unit. It passes them indirectly through memory.
	if (IsMingw64) {
	const llvm::fltSemantics *LDF = &getTarget().getLongDoubleFormat();
	if (LDF == &llvm::APFloat::x87DoubleExtended())
	return ABIArgInfo::getIndirect(Align, /ByVal=/false);
	}
	break;

	case BuiltinType::Int128:
	case BuiltinType::UInt128:
	// If it's a parameter type, the normal ABI rule is that arguments larger
	// than 8 bytes are passed indirectly. GCC follows it. We follow it too,
	// even though it isn't particularly efficient.
	if (!IsReturnType)
	return ABIArgInfo::getIndirect(Align, /ByVal=/false);

	// Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that.
	// Clang matches them for compatibility.
	return ABIArgInfo::getDirect(llvm::FixedVectorType::get(
	llvm::Type::getInt64Ty(getVMContext()), 2));

	default:
	break;
	}
	}

	if (Ty->isBitIntType()) {
	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
	// not 1, 2, 4, or 8 bytes, must be passed by reference."
	// However, non-power-of-two bit-precise integers will be passed as 1, 2, 4,
	// or 8 bytes anyway as long is it fits in them, so we don't have to check
	// the power of 2.
	if (Width <= 64)
	return ABIArgInfo::getDirect();
	return ABIArgInfo::getIndirect(Align, /ByVal=/false);
	}

	return ABIArgInfo::getDirect();
	}

	void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
	const unsigned CC = FI.getCallingConvention();
	bool IsVectorCall = CC == llvm::CallingConv::X86_VectorCall;
	bool IsRegCall = CC == llvm::CallingConv::X86_RegCall;

	// If __attribute__((sysv_abi)) is in use, use the SysV argument
	// classification rules.
	if (CC == llvm::CallingConv::X86_64_SysV) {
	X86_64ABIInfo SysVABIInfo(CGT, AVXLevel);
	SysVABIInfo.computeInfo(FI);
	return;
	}

	unsigned FreeSSERegs = 0;
	if (IsVectorCall) {
	// We can use up to 4 SSE return registers with vectorcall.
	FreeSSERegs = 4;
	} else if (IsRegCall) {
	// RegCall gives us 16 SSE registers.
	FreeSSERegs = 16;
	}

	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true,
	IsVectorCall, IsRegCall);

	if (IsVectorCall) {
	// We can use up to 6 SSE register parameters with vectorcall.
	FreeSSERegs = 6;
	} else if (IsRegCall) {
	// RegCall gives us 16 SSE registers, we can reuse the return registers.
	FreeSSERegs = 16;
	}

	unsigned ArgNum = 0;
	unsigned ZeroSSERegs = 0;
	for (auto &I : FI.arguments()) {
	// Vectorcall in x64 only permits the first 6 arguments to be passed as
	// XMM/YMM registers. After the sixth argument, pretend no vector
	// registers are left.
	unsigned *MaybeFreeSSERegs =
	(IsVectorCall && ArgNum >= 6) ? &ZeroSSERegs : &FreeSSERegs;
	I.info =
	classify(I.type, *MaybeFreeSSERegs, false, IsVectorCall, IsRegCall);
	++ArgNum;
	}

	if (IsVectorCall) {
	// For vectorcall, assign aggregate HVAs to any free vector registers in a
	// second pass.
	for (auto &I : FI.arguments())
	I.info = reclassifyHvaArgForVectorCall(I.type, FreeSSERegs, I.info);
	}
	}

	Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
	// not 1, 2, 4, or 8 bytes, must be passed by reference."
	uint64_t Width = getContext().getTypeSize(Ty);
	bool IsIndirect = Width > 64 \|\| !llvm::isPowerOf2_64(Width);

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
	CGF.getContext().getTypeInfoInChars(Ty),
	CharUnits::fromQuantity(8),
	/allowHigherAlign/ false);
	}

	static bool PPC_initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address, bool Is64Bit,
	bool IsAIX) {
	// This is calculated from the LLVM and GCC tables and verified
	// against gcc output. AFAIK all PPC ABIs use the same encoding.

	CodeGen::CGBuilderTy &Builder = CGF.Builder;

	llvm::IntegerType *i8 = CGF.Int8Ty;
	llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
	llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);
	llvm::Value *Sixteen8 = llvm::ConstantInt::get(i8, 16);

	// 0-31: r0-31, the 4-byte or 8-byte general-purpose registers
	AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 0, 31);

	// 32-63: fp0-31, the 8-byte floating-point registers
	AssignToArrayRange(Builder, Address, Eight8, 32, 63);

	// 64-67 are various 4-byte or 8-byte special-purpose registers:
	// 64: mq
	// 65: lr
	// 66: ctr
	// 67: ap
	AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 64, 67);

	// 68-76 are various 4-byte special-purpose registers:
	// 68-75 cr0-7
	// 76: xer
	AssignToArrayRange(Builder, Address, Four8, 68, 76);

	// 77-108: v0-31, the 16-byte vector registers
	AssignToArrayRange(Builder, Address, Sixteen8, 77, 108);

	// 109: vrsave
	// 110: vscr
	AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 109, 110);

	// AIX does not utilize the rest of the registers.
	if (IsAIX)
	return false;

	// 111: spe_acc
	// 112: spefscr
	// 113: sfp
	AssignToArrayRange(Builder, Address, Is64Bit ? Eight8 : Four8, 111, 113);

	if (!Is64Bit)
	return false;

	// TODO: Need to verify if these registers are used on 64 bit AIX with Power8
	// or above CPU.
	// 64-bit only registers:
	// 114: tfhar
	// 115: tfiar
	// 116: texasr
	AssignToArrayRange(Builder, Address, Eight8, 114, 116);

	return false;
	}

	// AIX
	namespace {
	/// AIXABIInfo - The AIX XCOFF ABI information.
	class AIXABIInfo : public ABIInfo {
	const bool Is64Bit;
	const unsigned PtrByteSize;
	CharUnits getParamTypeAlignment(QualType Ty) const;

	public:
	AIXABIInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
	: ABIInfo(CGT), Is64Bit(Is64Bit), PtrByteSize(Is64Bit ? 8 : 4) {}

	bool isPromotableTypeForABI(QualType Ty) const;

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType Ty) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());

	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	};

	class AIXTargetCodeGenInfo : public TargetCodeGenInfo {
	const bool Is64Bit;

	public:
	AIXTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool Is64Bit)
	: TargetCodeGenInfo(std::make_unique<AIXABIInfo>(CGT, Is64Bit)),
	Is64Bit(Is64Bit) {}
	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	return 1; // r1 is the dedicated stack pointer
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;
	};
	} // namespace

	// Return true if the ABI requires Ty to be passed sign- or zero-
	// extended to 32/64 bits.
	bool AIXABIInfo::isPromotableTypeForABI(QualType Ty) const {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Promotable integer types are required to be promoted by the ABI.
	if (Ty->isPromotableIntegerType())
	return true;

	if (!Is64Bit)
	return false;

	// For 64 bit mode, in addition to the usual promotable integer types, we also
	// need to extend all 32-bit types, since the ABI requires promotion to 64
	// bits.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
	switch (BT->getKind()) {
	case BuiltinType::Int:
	case BuiltinType::UInt:
	return true;
	default:
	break;
	}

	return false;
	}

	ABIArgInfo AIXABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isAnyComplexType())
	return ABIArgInfo::getDirect();

	if (RetTy->isVectorType())
	return ABIArgInfo::getDirect();

	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (isAggregateTypeForABI(RetTy))
	return getNaturalAlignIndirect(RetTy);

	return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo AIXABIInfo::classifyArgumentType(QualType Ty) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	if (Ty->isAnyComplexType())
	return ABIArgInfo::getDirect();

	if (Ty->isVectorType())
	return ABIArgInfo::getDirect();

	if (isAggregateTypeForABI(Ty)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// passed by value.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	CharUnits CCAlign = getParamTypeAlignment(Ty);
	CharUnits TyAlign = getContext().getTypeAlignInChars(Ty);

	return ABIArgInfo::getIndirect(CCAlign, /ByVal/ true,
	/Realign/ TyAlign > CCAlign);
	}

	return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	CharUnits AIXABIInfo::getParamTypeAlignment(QualType Ty) const {
	// Complex types are passed just like their elements.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>())
	Ty = CTy->getElementType();

	if (Ty->isVectorType())
	return CharUnits::fromQuantity(16);

	// If the structure contains a vector type, the alignment is 16.
	if (isRecordWithSIMDVectorType(getContext(), Ty))
	return CharUnits::fromQuantity(16);

	return CharUnits::fromQuantity(PtrByteSize);
	}

	Address AIXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {

	auto TypeInfo = getContext().getTypeInfoInChars(Ty);
	TypeInfo.Align = getParamTypeAlignment(Ty);

	CharUnits SlotSize = CharUnits::fromQuantity(PtrByteSize);

	// If we have a complex type and the base type is smaller than the register
	// size, the ABI calls for the real and imaginary parts to be right-adjusted
	// in separate words in 32bit mode or doublewords in 64bit mode. However,
	// Clang expects us to produce a pointer to a structure with the two parts
	// packed tightly. So generate loads of the real and imaginary parts relative
	// to the va_list pointer, and store them to a temporary structure. We do the
	// same as the PPC64ABI here.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
	CharUnits EltSize = TypeInfo.Width / 2;
	if (EltSize < SlotSize)
	return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
	}

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /Indirect/ false, TypeInfo,
	SlotSize, /AllowHigher/ true);
	}

	bool AIXTargetCodeGenInfo::initDwarfEHRegSizeTable(
	CodeGen::CodeGenFunction &CGF, llvm::Value *Address) const {
	return PPC_initDwarfEHRegSizeTable(CGF, Address, Is64Bit, /IsAIX/ true);
	}

	// PowerPC-32
	namespace {
	/// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
	class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
	bool IsSoftFloatABI;
	bool IsRetSmallStructInRegABI;

	CharUnits getParamTypeAlignment(QualType Ty) const;

	public:
	PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI,
	bool RetSmallStructInRegABI)
	: DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI),
	IsRetSmallStructInRegABI(RetSmallStructInRegABI) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	};

	class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI,
	bool RetSmallStructInRegABI)
	: TargetCodeGenInfo(std::make_unique<PPC32_SVR4_ABIInfo>(
	CGT, SoftFloatABI, RetSmallStructInRegABI)) {}

	static bool isStructReturnInRegABI(const llvm::Triple &Triple,
	const CodeGenOptions &Opts);

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	// This is recovered from gcc output.
	return 1; // r1 is the dedicated stack pointer
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;
	};
	}

	CharUnits PPC32_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
	// Complex types are passed just like their elements.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>())
	Ty = CTy->getElementType();

	if (Ty->isVectorType())
	return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16
	: 4);

	// For single-element float/vector structs, we consider the whole type
	// to have the same alignment requirements as its single element.
	const Type *AlignTy = nullptr;
	if (const Type *EltType = isSingleElementStruct(Ty, getContext())) {
	const BuiltinType *BT = EltType->getAs<BuiltinType>();
	if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) \|\|
	(BT && BT->isFloatingPoint()))
	AlignTy = EltType;
	}

	if (AlignTy)
	return CharUnits::fromQuantity(AlignTy->isVectorType() ? 16 : 4);
	return CharUnits::fromQuantity(4);
	}

	ABIArgInfo PPC32_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
	uint64_t Size;

	// -msvr4-struct-return puts small aggregates in GPR3 and GPR4.
	if (isAggregateTypeForABI(RetTy) && IsRetSmallStructInRegABI &&
	(Size = getContext().getTypeSize(RetTy)) <= 64) {
	// System V ABI (1995), page 3-22, specified:
	// > A structure or union whose size is less than or equal to 8 bytes
	// > shall be returned in r3 and r4, as if it were first stored in the
	// > 8-byte aligned memory area and then the low addressed word were
	// > loaded into r3 and the high-addressed word into r4. Bits beyond
	// > the last member of the structure or union are not defined.
	//
	// GCC for big-endian PPC32 inserts the pad before the first member,
	// not "beyond the last member" of the struct. To stay compatible
	// with GCC, we coerce the struct to an integer of the same size.
	// LLVM will extend it and return i32 in r3, or i64 in r3:r4.
	if (Size == 0)
	return ABIArgInfo::getIgnore();
	else {
	llvm::Type *CoerceTy = llvm::Type::getIntNTy(getVMContext(), Size);
	return ABIArgInfo::getDirect(CoerceTy);
	}
	}

	return DefaultABIInfo::classifyReturnType(RetTy);
	}

	// TODO: this implementation is now likely redundant with
	// DefaultABIInfo::EmitVAArg.
	Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
	QualType Ty) const {
	if (getTarget().getTriple().isOSDarwin()) {
	auto TI = getContext().getTypeInfoInChars(Ty);
	TI.Align = getParamTypeAlignment(Ty);

	CharUnits SlotSize = CharUnits::fromQuantity(4);
	return emitVoidPtrVAArg(CGF, VAList, Ty,
	classifyArgumentType(Ty).isIndirect(), TI, SlotSize,
	/AllowHigherAlign=/true);
	}

	const unsigned OverflowLimit = 8;
	if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
	// TODO: Implement this. For now ignore.
	(void)CTy;
	return Address::invalid(); // FIXME?
	}

	// struct __va_list_tag {
	// unsigned char gpr;
	// unsigned char fpr;
	// unsigned short reserved;
	// void *overflow_arg_area;
	// void *reg_save_area;
	// };

	bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
	bool isInt = !Ty->isFloatingType();
	bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64;

	// All aggregates are passed indirectly? That doesn't seem consistent
	// with the argument-lowering code.
	bool isIndirect = isAggregateTypeForABI(Ty);

	CGBuilderTy &Builder = CGF.Builder;

	// The calling convention either uses 1-2 GPRs or 1 FPR.
	Address NumRegsAddr = Address::invalid();
	if (isInt \|\| IsSoftFloatABI) {
	NumRegsAddr = Builder.CreateStructGEP(VAList, 0, "gpr");
	} else {
	NumRegsAddr = Builder.CreateStructGEP(VAList, 1, "fpr");
	}

	llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");

	// "Align" the register count when TY is i64.
	if (isI64 \|\| (isF64 && IsSoftFloatABI)) {
	NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
	NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
	}

	llvm::Value *CC =
	Builder.CreateICmpULT(NumRegs, Builder.getInt8(OverflowLimit), "cond");

	llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
	llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow");
	llvm::BasicBlock *Cont = CGF.createBasicBlock("cont");

	Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);

	llvm::Type DirectTy = CGF.ConvertType(Ty), ElementTy = DirectTy;
	if (isIndirect) DirectTy = DirectTy->getPointerTo(0);

	// Case 1: consume registers.
	Address RegAddr = Address::invalid();
	{
	CGF.EmitBlock(UsingRegs);

	Address RegSaveAreaPtr = Builder.CreateStructGEP(VAList, 4);
	RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr), CGF.Int8Ty,
	CharUnits::fromQuantity(8));
	assert(RegAddr.getElementType() == CGF.Int8Ty);

	// Floating-point registers start after the general-purpose registers.
	if (!(isInt \|\| IsSoftFloatABI)) {
	RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
	CharUnits::fromQuantity(32));
	}

	// Get the address of the saved value by scaling the number of
	// registers we've used by the number of
	CharUnits RegSize = CharUnits::fromQuantity((isInt \|\| IsSoftFloatABI) ? 4 : 8);
	llvm::Value *RegOffset =
	Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
	RegAddr = Address(
	Builder.CreateInBoundsGEP(CGF.Int8Ty, RegAddr.getPointer(), RegOffset),
	CGF.Int8Ty, RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
	RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);

	// Increase the used-register count.
	NumRegs =
	Builder.CreateAdd(NumRegs,
	Builder.getInt8((isI64 \|\| (isF64 && IsSoftFloatABI)) ? 2 : 1));
	Builder.CreateStore(NumRegs, NumRegsAddr);

	CGF.EmitBranch(Cont);
	}

	// Case 2: consume space in the overflow area.
	Address MemAddr = Address::invalid();
	{
	CGF.EmitBlock(UsingOverflow);

	Builder.CreateStore(Builder.getInt8(OverflowLimit), NumRegsAddr);

	// Everything in the overflow area is rounded up to a size of at least 4.
	CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);

	CharUnits Size;
	if (!isIndirect) {
	auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
	Size = TypeInfo.Width.alignTo(OverflowAreaAlign);
	} else {
	Size = CGF.getPointerSize();
	}

	Address OverflowAreaAddr = Builder.CreateStructGEP(VAList, 3);
	Address OverflowArea =
	Address(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"), CGF.Int8Ty,
	OverflowAreaAlign);
	// Round up address of argument to alignment
	CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
	if (Align > OverflowAreaAlign) {
	llvm::Value *Ptr = OverflowArea.getPointer();
	OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align),
	OverflowArea.getElementType(), Align);
	}

	MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);

	// Increase the overflow area.
	OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
	Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
	CGF.EmitBranch(Cont);
	}

	CGF.EmitBlock(Cont);

	// Merge the cases with a phi.
	Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow,
	"vaarg.addr");

	// Load the pointer if the argument was passed indirectly.
	if (isIndirect) {
	Result = Address(Builder.CreateLoad(Result, "aggr"), ElementTy,
	getContext().getTypeAlignInChars(Ty));
	}

	return Result;
	}

	bool PPC32TargetCodeGenInfo::isStructReturnInRegABI(
	const llvm::Triple &Triple, const CodeGenOptions &Opts) {
	assert(Triple.isPPC32());

	switch (Opts.getStructReturnConvention()) {
	case CodeGenOptions::SRCK_Default:
	break;
	case CodeGenOptions::SRCK_OnStack: // -maix-struct-return
	return false;
	case CodeGenOptions::SRCK_InRegs: // -msvr4-struct-return
	return true;
	}

	if (Triple.isOSBinFormatELF() && !Triple.isOSLinux())
	return true;

	return false;
	}

	bool
	PPC32TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	return PPC_initDwarfEHRegSizeTable(CGF, Address, /Is64Bit/ false,
	/IsAIX/ false);
	}

	// PowerPC-64

	namespace {
	/// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
	class PPC64_SVR4_ABIInfo : public SwiftABIInfo {
	public:
	enum ABIKind {
	ELFv1 = 0,
	ELFv2
	};

	private:
	static const unsigned GPRBits = 64;
	ABIKind Kind;
	bool IsSoftFloatABI;

	public:
	PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind,
	bool SoftFloatABI)
	: SwiftABIInfo(CGT), Kind(Kind), IsSoftFloatABI(SoftFloatABI) {}

	bool isPromotableTypeForABI(QualType Ty) const;
	CharUnits getParamTypeAlignment(QualType Ty) const;

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType Ty) const;

	bool isHomogeneousAggregateBaseType(QualType Ty) const override;
	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
	uint64_t Members) const override;

	// TODO: We can add more logic to computeInfo to improve performance.
	// Example: For aggregate arguments that fit in a register, we could
	// use getDirectInReg (as is done below for structs containing a single
	// floating-point value) to avoid pushing them to memory on function
	// entry. This would require changing the logic in PPCISelLowering
	// when lowering the parameters in the caller and args in the callee.
	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments()) {
	// We rely on the default argument classification for the most part.
	// One exception: An aggregate containing a single floating-point
	// or vector item must be passed in a register if one is available.
	const Type *T = isSingleElementStruct(I.type, getContext());
	if (T) {
	const BuiltinType *BT = T->getAs<BuiltinType>();
	if ((T->isVectorType() && getContext().getTypeSize(T) == 128) \|\|
	(BT && BT->isFloatingPoint())) {
	QualType QT(T, 0);
	I.info = ABIArgInfo::getDirectInReg(CGT.ConvertType(QT));
	continue;
	}
	}
	I.info = classifyArgumentType(I.type);
	}
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}

	bool isSwiftErrorInRegister() const override {
	return false;
	}
	};

	class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {

	public:
	PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
	PPC64_SVR4_ABIInfo::ABIKind Kind,
	bool SoftFloatABI)
	: TargetCodeGenInfo(
	std::make_unique<PPC64_SVR4_ABIInfo>(CGT, Kind, SoftFloatABI)) {}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	// This is recovered from gcc output.
	return 1; // r1 is the dedicated stack pointer
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;
	};

	class PPC64TargetCodeGenInfo : public DefaultTargetCodeGenInfo {
	public:
	PPC64TargetCodeGenInfo(CodeGenTypes &CGT) : DefaultTargetCodeGenInfo(CGT) {}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	// This is recovered from gcc output.
	return 1; // r1 is the dedicated stack pointer
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;
	};

	}

	// Return true if the ABI requires Ty to be passed sign- or zero-
	// extended to 64 bits.
	bool
	PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Promotable integer types are required to be promoted by the ABI.
	if (isPromotableIntegerTypeForABI(Ty))
	return true;

	// In addition to the usual promotable integer types, we also need to
	// extend all 32-bit types, since the ABI requires promotion to 64 bits.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
	switch (BT->getKind()) {
	case BuiltinType::Int:
	case BuiltinType::UInt:
	return true;
	default:
	break;
	}

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() < 64)
	return true;

	return false;
	}

	/// isAlignedParamType - Determine whether a type requires 16-byte or
	/// higher alignment in the parameter area. Always returns at least 8.
	CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
	// Complex types are passed just like their elements.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>())
	Ty = CTy->getElementType();

	auto FloatUsesVector = [this](QualType Ty){
	return Ty->isRealFloatingType() && &getContext().getFloatTypeSemantics(
	Ty) == &llvm::APFloat::IEEEquad();
	};

	// Only vector types of size 16 bytes need alignment (larger types are
	// passed via reference, smaller types are not aligned).
	if (Ty->isVectorType()) {
	return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
	} else if (FloatUsesVector(Ty)) {
	// According to ABI document section 'Optional Save Areas': If extended
	// precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION
	// format are supported, map them to a single quadword, quadword aligned.
	return CharUnits::fromQuantity(16);
	}

	// For single-element float/vector structs, we consider the whole type
	// to have the same alignment requirements as its single element.
	const Type *AlignAsType = nullptr;
	const Type *EltType = isSingleElementStruct(Ty, getContext());
	if (EltType) {
	const BuiltinType *BT = EltType->getAs<BuiltinType>();
	if ((EltType->isVectorType() && getContext().getTypeSize(EltType) == 128) \|\|
	(BT && BT->isFloatingPoint()))
	AlignAsType = EltType;
	}

	// Likewise for ELFv2 homogeneous aggregates.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (!AlignAsType && Kind == ELFv2 &&
	isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
	AlignAsType = Base;

	// With special case aggregates, only vector base types need alignment.
	if (AlignAsType) {
	bool UsesVector = AlignAsType->isVectorType() \|\|
	FloatUsesVector(QualType(AlignAsType, 0));
	return CharUnits::fromQuantity(UsesVector ? 16 : 8);
	}

	// Otherwise, we only need alignment for any aggregate type that
	// has an alignment requirement of >= 16 bytes.
	if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
	return CharUnits::fromQuantity(16);
	}

	return CharUnits::fromQuantity(8);
	}

	/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
	/// aggregate. Base is set to the base element type, and Members is set
	/// to the number of base elements.
	bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
	uint64_t &Members) const {
	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
	uint64_t NElements = AT->getSize().getZExtValue();
	if (NElements == 0)
	return false;
	if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
	return false;
	Members *= NElements;
	} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return false;

	Members = 0;

	// If this is a C++ record, check the properties of the record such as
	// bases and ABI specific restrictions
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	if (!getCXXABI().isPermittedToBeHomogeneousAggregate(CXXRD))
	return false;

	for (const auto &I : CXXRD->bases()) {
	// Ignore empty records.
	if (isEmptyRecord(getContext(), I.getType(), true))
	continue;

	uint64_t FldMembers;
	if (!isHomogeneousAggregate(I.getType(), Base, FldMembers))
	return false;

	Members += FldMembers;
	}
	}

	for (const auto *FD : RD->fields()) {
	// Ignore (non-zero arrays of) empty records.
	QualType FT = FD->getType();
	while (const ConstantArrayType *AT =
	getContext().getAsConstantArrayType(FT)) {
	if (AT->getSize().getZExtValue() == 0)
	return false;
	FT = AT->getElementType();
	}
	if (isEmptyRecord(getContext(), FT, true))
	continue;

	if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() &&
	FD->isZeroLengthBitField(getContext()))
	continue;

	uint64_t FldMembers;
	if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
	return false;

	Members = (RD->isUnion() ?
	std::max(Members, FldMembers) : Members + FldMembers);
	}

	if (!Base)
	return false;

	// Ensure there is no padding.
	if (getContext().getTypeSize(Base) * Members !=
	getContext().getTypeSize(Ty))
	return false;
	} else {
	Members = 1;
	if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
	Members = 2;
	Ty = CT->getElementType();
	}

	// Most ABIs only support float, double, and some vector type widths.
	if (!isHomogeneousAggregateBaseType(Ty))
	return false;

	// The base type must be the same for all members. Types that
	// agree in both total size and mode (float vs. vector) are
	// treated as being equivalent here.
	const Type *TyPtr = Ty.getTypePtr();
	if (!Base) {
	Base = TyPtr;
	// If it's a non-power-of-2 vector, its size is already a power-of-2,
	// so make sure to widen it explicitly.
	if (const VectorType *VT = Base->getAs<VectorType>()) {
	QualType EltTy = VT->getElementType();
	unsigned NumElements =
	getContext().getTypeSize(VT) / getContext().getTypeSize(EltTy);
	Base = getContext()
	.getVectorType(EltTy, NumElements, VT->getVectorKind())
	.getTypePtr();
	}
	}

	if (Base->isVectorType() != TyPtr->isVectorType() \|\|
	getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
	return false;
	}
	return Members > 0 && isHomogeneousAggregateSmallEnough(Base, Members);
	}

	bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
	// Homogeneous aggregates for ELFv2 must have base types of float,
	// double, long double, or 128-bit vectors.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	if (BT->getKind() == BuiltinType::Float \|\|
	BT->getKind() == BuiltinType::Double \|\|
	BT->getKind() == BuiltinType::LongDouble \|\|
	BT->getKind() == BuiltinType::Ibm128 \|\|
	(getContext().getTargetInfo().hasFloat128Type() &&
	(BT->getKind() == BuiltinType::Float128))) {
	if (IsSoftFloatABI)
	return false;
	return true;
	}
	}
	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	if (getContext().getTypeSize(VT) == 128)
	return true;
	}
	return false;
	}

	bool PPC64_SVR4_ABIInfo::isHomogeneousAggregateSmallEnough(
	const Type *Base, uint64_t Members) const {
	// Vector and fp128 types require one register, other floating point types
	// require one or two registers depending on their size.
	uint32_t NumRegs =
	((getContext().getTargetInfo().hasFloat128Type() &&
	Base->isFloat128Type()) \|\|
	Base->isVectorType()) ? 1
	: (getContext().getTypeSize(Base) + 63) / 64;

	// Homogeneous Aggregates may occupy at most 8 registers.
	return Members * NumRegs <= 8;
	}

	ABIArgInfo
	PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	if (Ty->isAnyComplexType())
	return ABIArgInfo::getDirect();

	// Non-Altivec vector types are passed in GPRs (smaller than 16 bytes)
	// or via reference (larger than 16 bytes).
	if (Ty->isVectorType()) {
	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size > 128)
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	else if (Size < 128) {
	llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
	return ABIArgInfo::getDirect(CoerceTy);
	}
	}

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 128)
	return getNaturalAlignIndirect(Ty, /ByVal=/true);

	if (isAggregateTypeForABI(Ty)) {
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
	uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();

	// ELFv2 homogeneous aggregates are passed as array types.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (Kind == ELFv2 &&
	isHomogeneousAggregate(Ty, Base, Members)) {
	llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
	llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
	return ABIArgInfo::getDirect(CoerceTy);
	}

	// If an aggregate may end up fully in registers, we do not
	// use the ByVal method, but pass the aggregate as array.
	// This is usually beneficial since we avoid forcing the
	// back-end to store the argument to memory.
	uint64_t Bits = getContext().getTypeSize(Ty);
	if (Bits > 0 && Bits <= 8 * GPRBits) {
	llvm::Type *CoerceTy;

	// Types up to 8 bytes are passed as integer type (which will be
	// properly aligned in the argument save area doubleword).
	if (Bits <= GPRBits)
	CoerceTy =
	llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
	// Larger types are passed as arrays, with the base type selected
	// according to the required alignment in the save area.
	else {
	uint64_t RegBits = ABIAlign * 8;
	uint64_t NumRegs = llvm::alignTo(Bits, RegBits) / RegBits;
	llvm::Type *RegTy = llvm::IntegerType::get(getVMContext(), RegBits);
	CoerceTy = llvm::ArrayType::get(RegTy, NumRegs);
	}

	return ABIArgInfo::getDirect(CoerceTy);
	}

	// All other aggregates are passed ByVal.
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
	/ByVal=/true,
	/Realign=/TyAlign > ABIAlign);
	}

	return (isPromotableTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo
	PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (RetTy->isAnyComplexType())
	return ABIArgInfo::getDirect();

	// Non-Altivec vector types are returned in GPRs (smaller than 16 bytes)
	// or via reference (larger than 16 bytes).
	if (RetTy->isVectorType()) {
	uint64_t Size = getContext().getTypeSize(RetTy);
	if (Size > 128)
	return getNaturalAlignIndirect(RetTy);
	else if (Size < 128) {
	llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
	return ABIArgInfo::getDirect(CoerceTy);
	}
	}

	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() > 128)
	return getNaturalAlignIndirect(RetTy, /ByVal=/false);

	if (isAggregateTypeForABI(RetTy)) {
	// ELFv2 homogeneous aggregates are returned as array types.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (Kind == ELFv2 &&
	isHomogeneousAggregate(RetTy, Base, Members)) {
	llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
	llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
	return ABIArgInfo::getDirect(CoerceTy);
	}

	// ELFv2 small aggregates are returned in up to two registers.
	uint64_t Bits = getContext().getTypeSize(RetTy);
	if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
	if (Bits == 0)
	return ABIArgInfo::getIgnore();

	llvm::Type *CoerceTy;
	if (Bits > GPRBits) {
	CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
	CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy);
	} else
	CoerceTy =
	llvm::IntegerType::get(getVMContext(), llvm::alignTo(Bits, 8));
	return ABIArgInfo::getDirect(CoerceTy);
	}

	// All other aggregates are returned indirectly.
	return getNaturalAlignIndirect(RetTy);
	}

	return (isPromotableTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	// Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
	Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	auto TypeInfo = getContext().getTypeInfoInChars(Ty);
	TypeInfo.Align = getParamTypeAlignment(Ty);

	CharUnits SlotSize = CharUnits::fromQuantity(8);

	// If we have a complex type and the base type is smaller than 8 bytes,
	// the ABI calls for the real and imaginary parts to be right-adjusted
	// in separate doublewords. However, Clang expects us to produce a
	// pointer to a structure with the two parts packed tightly. So generate
	// loads of the real and imaginary parts relative to the va_list pointer,
	// and store them to a temporary structure.
	if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
	CharUnits EltSize = TypeInfo.Width / 2;
	if (EltSize < SlotSize)
	return complexTempStructure(CGF, VAListAddr, Ty, SlotSize, EltSize, CTy);
	}

	// Otherwise, just use the general rule.
	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /Indirect/ false,
	TypeInfo, SlotSize, /AllowHigher/ true);
	}

	bool
	PPC64_SVR4_TargetCodeGenInfo::initDwarfEHRegSizeTable(
	CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	return PPC_initDwarfEHRegSizeTable(CGF, Address, /Is64Bit/ true,
	/IsAIX/ false);
	}

	bool
	PPC64TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	return PPC_initDwarfEHRegSizeTable(CGF, Address, /Is64Bit/ true,
	/IsAIX/ false);
	}

	//===----------------------------------------------------------------------===//
	// AArch64 ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class AArch64ABIInfo : public SwiftABIInfo {
	public:
	enum ABIKind {
	AAPCS = 0,
	DarwinPCS,
	Win64
	};

	private:
	ABIKind Kind;

	public:
	AArch64ABIInfo(CodeGenTypes &CGT, ABIKind Kind)
	: SwiftABIInfo(CGT), Kind(Kind) {}

	private:
	ABIKind getABIKind() const { return Kind; }
	bool isDarwinPCS() const { return Kind == DarwinPCS; }

	ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
	unsigned CallingConvention) const;
	ABIArgInfo coerceIllegalVector(QualType Ty) const;
	bool isHomogeneousAggregateBaseType(QualType Ty) const override;
	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
	uint64_t Members) const override;
	bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;

	bool isIllegalVectorType(QualType Ty) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	if (!::classifyReturnType(getCXXABI(), FI, *this))
	FI.getReturnInfo() =
	classifyReturnType(FI.getReturnType(), FI.isVariadic());

	for (auto &it : FI.arguments())
	it.info = classifyArgumentType(it.type, FI.isVariadic(),
	FI.getCallingConvention());
	}

	Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
	CodeGenFunction &CGF) const;

	Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
	CodeGenFunction &CGF) const;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override {
	llvm::Type *BaseTy = CGF.ConvertType(Ty);
	if (isa<llvm::ScalableVectorType>(BaseTy))
	llvm::report_fatal_error("Passing SVE types to variadic functions is "
	"currently not supported");

	return Kind == Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
	: isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
	: EmitAAPCSVAArg(VAListAddr, Ty, CGF);
	}

	Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}
	bool isSwiftErrorInRegister() const override {
	return true;
	}

	bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
	unsigned elts) const override;

	bool allowBFloatArgsAndRet() const override {
	return getTarget().hasBFloat16Type();
	}
	};

	class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind Kind)
	: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {}

	StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
	return "mov\tfp, fp\t\t// marker for objc_retainAutoreleaseReturnValue";
	}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	return 31;
	}

	bool doesReturnSlotInterfereWithArgs() const override { return false; }

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD)
	return;

	const auto *TA = FD->getAttr<TargetAttr>();
	if (TA == nullptr)
	return;

	ParsedTargetAttr Attr = TA->parse();
	if (Attr.BranchProtection.empty())
	return;

	TargetInfo::BranchProtectionInfo BPI;
	StringRef Error;
	(void)CGM.getTarget().validateBranchProtection(
	Attr.BranchProtection, Attr.Architecture, BPI, Error);
	assert(Error.empty());

	auto *Fn = cast<llvm::Function>(GV);
	static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
	Fn->addFnAttr("sign-return-address", SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);

	if (BPI.SignReturnAddr != LangOptions::SignReturnAddressScopeKind::None) {
	Fn->addFnAttr("sign-return-address-key",
	BPI.SignKey == LangOptions::SignReturnAddressKeyKind::AKey
	? "a_key"
	: "b_key");
	}

	Fn->addFnAttr("branch-target-enforcement",
	BPI.BranchTargetEnforcement ? "true" : "false");
	}

	bool isScalarizableAsmOperand(CodeGen::CodeGenFunction &CGF,
	llvm::Type *Ty) const override {
	if (CGF.getTarget().hasFeature("ls64")) {
	auto *ST = dyn_cast<llvm::StructType>(Ty);
	if (ST && ST->getNumElements() == 1) {
	auto *AT = dyn_cast<llvm::ArrayType>(ST->getElementType(0));
	if (AT && AT->getNumElements() == 8 &&
	AT->getElementType()->isIntegerTy(64))
	return true;
	}
	}
	return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
	}
	};

	class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
	public:
	WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIInfo::ABIKind K)
	: AArch64TargetCodeGenInfo(CGT, K) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override;

	void getDependentLibraryOption(llvm::StringRef Lib,
	llvm::SmallString<24> &Opt) const override {
	Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
	}

	void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
	llvm::SmallString<32> &Opt) const override {
	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
	}
	};

	void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
	AArch64TargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
	if (GV->isDeclaration())
	return;
	addStackProbeTargetAttributes(D, GV, CGM);
	}
	}

	ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
	assert(Ty->isVectorType() && "expected vector type!");

	const auto *VT = Ty->castAs<VectorType>();
	if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
	assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
	assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
	BuiltinType::UChar &&
	"unexpected builtin type for SVE predicate!");
	return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
	llvm::Type::getInt1Ty(getVMContext()), 16));
	}

	if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) {
	assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");

	const auto *BT = VT->getElementType()->castAs<BuiltinType>();
	llvm::ScalableVectorType *ResType = nullptr;
	switch (BT->getKind()) {
	default:
	llvm_unreachable("unexpected builtin type for SVE vector!");
	case BuiltinType::SChar:
	case BuiltinType::UChar:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getInt8Ty(getVMContext()), 16);
	break;
	case BuiltinType::Short:
	case BuiltinType::UShort:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getInt16Ty(getVMContext()), 8);
	break;
	case BuiltinType::Int:
	case BuiltinType::UInt:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getInt32Ty(getVMContext()), 4);
	break;
	case BuiltinType::Long:
	case BuiltinType::ULong:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getInt64Ty(getVMContext()), 2);
	break;
	case BuiltinType::Half:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getHalfTy(getVMContext()), 8);
	break;
	case BuiltinType::Float:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getFloatTy(getVMContext()), 4);
	break;
	case BuiltinType::Double:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getDoubleTy(getVMContext()), 2);
	break;
	case BuiltinType::BFloat16:
	ResType = llvm::ScalableVectorType::get(
	llvm::Type::getBFloatTy(getVMContext()), 8);
	break;
	}
	return ABIArgInfo::getDirect(ResType);
	}

	uint64_t Size = getContext().getTypeSize(Ty);
	// Android promotes <2 x i8> to i16, not i32
	if (isAndroid() && (Size <= 16)) {
	llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
	return ABIArgInfo::getDirect(ResType);
	}
	if (Size <= 32) {
	llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
	return ABIArgInfo::getDirect(ResType);
	}
	if (Size == 64) {
	auto *ResType =
	llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
	return ABIArgInfo::getDirect(ResType);
	}
	if (Size == 128) {
	auto *ResType =
	llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
	return ABIArgInfo::getDirect(ResType);
	}
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	ABIArgInfo
	AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
	unsigned CallingConvention) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	// Handle illegal vector types here.
	if (isIllegalVectorType(Ty))
	return coerceIllegalVector(Ty);

	if (!isAggregateTypeForABI(Ty)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 128)
	return getNaturalAlignIndirect(Ty);

	return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
	? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	// Structures with either a non-trivial destructor or a non-trivial
	// copy constructor are always indirect.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
	return getNaturalAlignIndirect(Ty, /ByVal=/RAA ==
	CGCXXABI::RAA_DirectInMemory);
	}

	// Empty records are always ignored on Darwin, but actually passed in C++ mode
	// elsewhere for GNU compatibility.
	uint64_t Size = getContext().getTypeSize(Ty);
	bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
	if (IsEmpty \|\| Size == 0) {
	if (!getContext().getLangOpts().CPlusPlus \|\| isDarwinPCS())
	return ABIArgInfo::getIgnore();

	// GNU C mode. The only argument that gets ignored is an empty one with size
	// 0.
	if (IsEmpty && Size == 0)
	return ABIArgInfo::getIgnore();
	return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
	}

	// Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	bool IsWin64 = Kind == Win64 \|\| CallingConvention == llvm::CallingConv::Win64;
	bool IsWinVariadic = IsWin64 && IsVariadic;
	// In variadic functions on Windows, all composite types are treated alike,
	// no special handling of HFAs/HVAs.
	if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
	if (Kind != AArch64ABIInfo::AAPCS)
	return ABIArgInfo::getDirect(
	llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));

	// For alignment adjusted HFAs, cap the argument alignment to 16, leave it
	// default otherwise.
	unsigned Align =
	getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
	unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
	Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
	return ABIArgInfo::getDirect(
	llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
	nullptr, true, Align);
	}

	// Aggregates <= 16 bytes are passed directly in registers or on the stack.
	if (Size <= 128) {
	// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
	// same size and alignment.
	if (getTarget().isRenderScriptTarget()) {
	return coerceToIntArray(Ty, getContext(), getVMContext());
	}
	unsigned Alignment;
	if (Kind == AArch64ABIInfo::AAPCS) {
	Alignment = getContext().getTypeUnadjustedAlign(Ty);
	Alignment = Alignment < 128 ? 64 : 128;
	} else {
	Alignment = std::max(getContext().getTypeAlign(Ty),
	(unsigned)getTarget().getPointerWidth(0));
	}
	Size = llvm::alignTo(Size, Alignment);

	// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
	// For aggregates with 16-byte alignment, we use i128.
	llvm::Type *BaseTy = llvm::Type::getIntNTy(getVMContext(), Alignment);
	return ABIArgInfo::getDirect(
	Size == Alignment ? BaseTy
	: llvm::ArrayType::get(BaseTy, Size / Alignment));
	}

	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
	bool IsVariadic) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (const auto *VT = RetTy->getAs<VectorType>()) {
	if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector \|\|
	VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	return coerceIllegalVector(RetTy);
	}

	// Large vector types should be returned via memory.
	if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
	return getNaturalAlignIndirect(RetTy);

	if (!isAggregateTypeForABI(RetTy)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() > 128)
	return getNaturalAlignIndirect(RetTy);

	return (isPromotableIntegerTypeForABI(RetTy) && isDarwinPCS()
	? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	uint64_t Size = getContext().getTypeSize(RetTy);
	if (isEmptyRecord(getContext(), RetTy, true) \|\| Size == 0)
	return ABIArgInfo::getIgnore();

	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (isHomogeneousAggregate(RetTy, Base, Members) &&
	!(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
	IsVariadic))
	// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
	return ABIArgInfo::getDirect();

	// Aggregates <= 16 bytes are returned directly in registers or on the stack.
	if (Size <= 128) {
	// On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
	// same size and alignment.
	if (getTarget().isRenderScriptTarget()) {
	return coerceToIntArray(RetTy, getContext(), getVMContext());
	}

	if (Size <= 64 && getDataLayout().isLittleEndian()) {
	// Composite types are returned in lower bits of a 64-bit register for LE,
	// and in higher bits for BE. However, integer types are always returned
	// in lower bits for both LE and BE, and they are not rounded up to
	// 64-bits. We can skip rounding up of composite types for LE, but not for
	// BE, otherwise composite types will be indistinguishable from integer
	// types.
	return ABIArgInfo::getDirect(
	llvm::IntegerType::get(getVMContext(), Size));
	}

	unsigned Alignment = getContext().getTypeAlign(RetTy);
	Size = llvm::alignTo(Size, 64); // round up to multiple of 8 bytes

	// We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
	// For aggregates with 16-byte alignment, we use i128.
	if (Alignment < 128 && Size == 128) {
	llvm::Type *BaseTy = llvm::Type::getInt64Ty(getVMContext());
	return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
	}
	return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
	}

	return getNaturalAlignIndirect(RetTy);
	}

	/// isIllegalVectorType - check whether the vector type is legal for AArch64.
	bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	// Check whether VT is a fixed-length SVE vector. These types are
	// represented as scalable vectors in function args/return and must be
	// coerced from fixed vectors.
	if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector \|\|
	VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
	return true;

	// Check whether VT is legal.
	unsigned NumElements = VT->getNumElements();
	uint64_t Size = getContext().getTypeSize(VT);
	// NumElements should be power of 2.
	if (!llvm::isPowerOf2_32(NumElements))
	return true;

	// arm64_32 has to be compatible with the ARM logic here, which allows huge
	// vectors for some reason.
	llvm::Triple Triple = getTarget().getTriple();
	if (Triple.getArch() == llvm::Triple::aarch64_32 &&
	Triple.isOSBinFormatMachO())
	return Size <= 32;

	return Size != 64 && (Size != 128 \|\| NumElements == 1);
	}
	return false;
	}

	bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize,
	llvm::Type *eltTy,
	unsigned elts) const {
	if (!llvm::isPowerOf2_32(elts))
	return false;
	if (totalSize.getQuantity() != 8 &&
	(totalSize.getQuantity() != 16 \|\| elts == 1))
	return false;
	return true;
	}

	bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
	// Homogeneous aggregates for AAPCS64 must have base types of a floating
	// point type or a short-vector type. This is the same as the 32-bit ABI,
	// but with the difference that any floating-point type is allowed,
	// including __fp16.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	if (BT->isFloatingPoint())
	return true;
	} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
	unsigned VecSize = getContext().getTypeSize(VT);
	if (VecSize == 64 \|\| VecSize == 128)
	return true;
	}
	return false;
	}

	bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
	uint64_t Members) const {
	return Members <= 4;
	}

	bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
	const {
	// AAPCS64 says that the rule for whether something is a homogeneous
	// aggregate is applied to the output of the data layout decision. So
	// anything that doesn't affect the data layout also does not affect
	// homogeneity. In particular, zero-length bitfields don't stop a struct
	// being homogeneous.
	return true;
	}

	Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
	CodeGenFunction &CGF) const {
	ABIArgInfo AI = classifyArgumentType(Ty, /IsVariadic=/true,
	CGF.CurFnInfo->getCallingConvention());
	bool IsIndirect = AI.isIndirect();

	llvm::Type *BaseTy = CGF.ConvertType(Ty);
	if (IsIndirect)
	BaseTy = llvm::PointerType::getUnqual(BaseTy);
	else if (AI.getCoerceToType())
	BaseTy = AI.getCoerceToType();

	unsigned NumRegs = 1;
	if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
	BaseTy = ArrTy->getElementType();
	NumRegs = ArrTy->getNumElements();
	}
	bool IsFPR = BaseTy->isFloatingPointTy() \|\| BaseTy->isVectorTy();

	// The AArch64 va_list type and handling is specified in the Procedure Call
	// Standard, section B.4:
	//
	// struct {
	// void *__stack;
	// void *__gr_top;
	// void *__vr_top;
	// int __gr_offs;
	// int __vr_offs;
	// };

	llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
	llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
	llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");

	CharUnits TySize = getContext().getTypeSizeInChars(Ty);
	CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);

	Address reg_offs_p = Address::invalid();
	llvm::Value *reg_offs = nullptr;
	int reg_top_index;
	int RegSize = IsIndirect ? 8 : TySize.getQuantity();
	if (!IsFPR) {
	// 3 is the field number of __gr_offs
	reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
	reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
	reg_top_index = 1; // field number for __gr_top
	RegSize = llvm::alignTo(RegSize, 8);
	} else {
	// 4 is the field number of __vr_offs.
	reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
	reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
	reg_top_index = 2; // field number for __vr_top
	RegSize = 16 * NumRegs;
	}

	//=======================================
	// Find out where argument was passed
	//=======================================

	// If reg_offs >= 0 we're already using the stack for this type of
	// argument. We don't want to keep updating reg_offs (in case it overflows,
	// though anyone passing 2GB of arguments, each at most 16 bytes, deserves
	// whatever they get).
	llvm::Value *UsingStack = nullptr;
	UsingStack = CGF.Builder.CreateICmpSGE(
	reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, 0));

	CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, MaybeRegBlock);

	// Otherwise, at least some kind of argument could go in these registers, the
	// question is whether this particular type is too big.
	CGF.EmitBlock(MaybeRegBlock);

	// Integer arguments may need to correct register alignment (for example a
	// "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
	// align __gr_offs to calculate the potential address.
	if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
	int Align = TyAlign.getQuantity();

	reg_offs = CGF.Builder.CreateAdd(
	reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
	"align_regoffs");
	reg_offs = CGF.Builder.CreateAnd(
	reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, -Align),
	"aligned_regoffs");
	}

	// Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
	// The fact that this is done unconditionally reflects the fact that
	// allocating an argument to the stack also uses up all the remaining
	// registers of the appropriate kind.
	llvm::Value *NewOffset = nullptr;
	NewOffset = CGF.Builder.CreateAdd(
	reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
	CGF.Builder.CreateStore(NewOffset, reg_offs_p);

	// Now we're in a position to decide whether this argument really was in
	// registers or not.
	llvm::Value *InRegs = nullptr;
	InRegs = CGF.Builder.CreateICmpSLE(
	NewOffset, llvm::ConstantInt::get(CGF.Int32Ty, 0), "inreg");

	CGF.Builder.CreateCondBr(InRegs, InRegBlock, OnStackBlock);

	//=======================================
	// Argument was in registers
	//=======================================

	// Now we emit the code for if the argument was originally passed in
	// registers. First start the appropriate block:
	CGF.EmitBlock(InRegBlock);

	llvm::Value *reg_top = nullptr;
	Address reg_top_p =
	CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index, "reg_top_p");
	reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
	Address BaseAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, reg_top, reg_offs),
	CGF.Int8Ty, CharUnits::fromQuantity(IsFPR ? 16 : 8));
	Address RegAddr = Address::invalid();
	llvm::Type MemTy = CGF.ConvertTypeForMem(Ty), ElementTy = MemTy;

	if (IsIndirect) {
	// If it's been passed indirectly (actually a struct), whatever we find from
	// stored registers or on the stack will actually be a struct **.
	MemTy = llvm::PointerType::getUnqual(MemTy);
	}

	const Type *Base = nullptr;
	uint64_t NumMembers = 0;
	bool IsHFA = isHomogeneousAggregate(Ty, Base, NumMembers);
	if (IsHFA && NumMembers > 1) {
	// Homogeneous aggregates passed in registers will have their elements split
	// and stored 16-bytes apart regardless of size (they're notionally in qN,
	// qN+1, ...). We reload and store into a temporary local variable
	// contiguously.
	assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
	auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
	llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
	llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
	Address Tmp = CGF.CreateTempAlloca(HFATy,
	std::max(TyAlign, BaseTyInfo.Align));

	// On big-endian platforms, the value will be right-aligned in its slot.
	int Offset = 0;
	if (CGF.CGM.getDataLayout().isBigEndian() &&
	BaseTyInfo.Width.getQuantity() < 16)
	Offset = 16 - BaseTyInfo.Width.getQuantity();

	for (unsigned i = 0; i < NumMembers; ++i) {
	CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
	Address LoadAddr =
	CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
	LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy);

	Address StoreAddr = CGF.Builder.CreateConstArrayGEP(Tmp, i);

	llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
	CGF.Builder.CreateStore(Elem, StoreAddr);
	}

	RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy);
	} else {
	// Otherwise the object is contiguous in memory.

	// It might be right-aligned in its slot.
	CharUnits SlotSize = BaseAddr.getAlignment();
	if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
	(IsHFA \|\| !isAggregateTypeForABI(Ty)) &&
	TySize < SlotSize) {
	CharUnits Offset = SlotSize - TySize;
	BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
	}

	RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy);
	}

	CGF.EmitBranch(ContBlock);

	//=======================================
	// Argument was on the stack
	//=======================================
	CGF.EmitBlock(OnStackBlock);

	Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0, "stack_p");
	llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");

	// Again, stack arguments may need realignment. In this case both integer and
	// floating-point ones might be affected.
	if (!IsIndirect && TyAlign.getQuantity() > 8) {
	int Align = TyAlign.getQuantity();

	OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty);

	OnStackPtr = CGF.Builder.CreateAdd(
	OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
	"align_stack");
	OnStackPtr = CGF.Builder.CreateAnd(
	OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
	"align_stack");

	OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy);
	}
	Address OnStackAddr = Address(OnStackPtr, CGF.Int8Ty,
	std::max(CharUnits::fromQuantity(8), TyAlign));

	// All stack slots are multiples of 8 bytes.
	CharUnits StackSlotSize = CharUnits::fromQuantity(8);
	CharUnits StackSize;
	if (IsIndirect)
	StackSize = StackSlotSize;
	else
	StackSize = TySize.alignTo(StackSlotSize);

	llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
	llvm::Value *NewStack = CGF.Builder.CreateInBoundsGEP(
	CGF.Int8Ty, OnStackPtr, StackSizeC, "new_stack");

	// Write the new value of __stack for the next call to va_arg
	CGF.Builder.CreateStore(NewStack, stack_p);

	if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
	TySize < StackSlotSize) {
	CharUnits Offset = StackSlotSize - TySize;
	OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
	}

	OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy);

	CGF.EmitBranch(ContBlock);

	//=======================================
	// Tidy up
	//=======================================
	CGF.EmitBlock(ContBlock);

	Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, OnStackAddr,
	OnStackBlock, "vaargs.addr");

	if (IsIndirect)
	return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"), ElementTy,
	TyAlign);

	return ResAddr;
	}

	Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
	CodeGenFunction &CGF) const {
	// The backend's lowering doesn't support va_arg for aggregates or
	// illegal vector types. Lower VAArg here for these cases and use
	// the LLVM va_arg instruction for everything else.
	if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
	return EmitVAArgInstr(CGF, VAListAddr, Ty, ABIArgInfo::getDirect());

	uint64_t PointerSize = getTarget().getPointerWidth(0) / 8;
	CharUnits SlotSize = CharUnits::fromQuantity(PointerSize);

	// Empty records are ignored for parameter passing purposes.
	if (isEmptyRecord(getContext(), Ty, true)) {
	Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"),
	getVAListElementType(CGF), SlotSize);
	Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
	return Addr;
	}

	// The size of the actual thing passed, which might end up just
	// being a pointer for indirect types.
	auto TyInfo = getContext().getTypeInfoInChars(Ty);

	// Arguments bigger than 16 bytes which aren't homogeneous
	// aggregates should be passed indirectly.
	bool IsIndirect = false;
	if (TyInfo.Width.getQuantity() > 16) {
	const Type *Base = nullptr;
	uint64_t Members = 0;
	IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
	}

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
	TyInfo, SlotSize, /AllowHigherAlign/ true);
	}

	Address AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	bool IsIndirect = false;

	// Composites larger than 16 bytes are passed by reference.
	if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
	IsIndirect = true;

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
	CGF.getContext().getTypeInfoInChars(Ty),
	CharUnits::fromQuantity(8),
	/allowHigherAlign/ false);
	}

	//===----------------------------------------------------------------------===//
	// ARM ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class ARMABIInfo : public SwiftABIInfo {
	public:
	enum ABIKind {
	APCS = 0,
	AAPCS = 1,
	AAPCS_VFP = 2,
	AAPCS16_VFP = 3,
	};

	private:
	ABIKind Kind;
	bool IsFloatABISoftFP;

	public:
	ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind)
	: SwiftABIInfo(CGT), Kind(_Kind) {
	setCCs();
	IsFloatABISoftFP = CGT.getCodeGenOpts().FloatABI == "softfp" \|\|
	CGT.getCodeGenOpts().FloatABI == ""; // default
	}

	bool isEABI() const {
	switch (getTarget().getTriple().getEnvironment()) {
	case llvm::Triple::Android:
	case llvm::Triple::EABI:
	case llvm::Triple::EABIHF:
	case llvm::Triple::GNUEABI:
	case llvm::Triple::GNUEABIHF:
	case llvm::Triple::MuslEABI:
	case llvm::Triple::MuslEABIHF:
	return true;
	default:
	return false;
	}
	}

	bool isEABIHF() const {
	switch (getTarget().getTriple().getEnvironment()) {
	case llvm::Triple::EABIHF:
	case llvm::Triple::GNUEABIHF:
	case llvm::Triple::MuslEABIHF:
	return true;
	default:
	return false;
	}
	}

	ABIKind getABIKind() const { return Kind; }

	bool allowBFloatArgsAndRet() const override {
	return !IsFloatABISoftFP && getTarget().hasBFloat16Type();
	}

	private:
	ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic,
	unsigned functionCallConv) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
	unsigned functionCallConv) const;
	ABIArgInfo classifyHomogeneousAggregate(QualType Ty, const Type *Base,
	uint64_t Members) const;
	ABIArgInfo coerceIllegalVector(QualType Ty) const;
	bool isIllegalVectorType(QualType Ty) const;
	bool containsAnyFP16Vectors(QualType Ty) const;

	bool isHomogeneousAggregateBaseType(QualType Ty) const override;
	bool isHomogeneousAggregateSmallEnough(const Type *Ty,
	uint64_t Members) const override;
	bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;

	bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const;

	void computeInfo(CGFunctionInfo &FI) const override;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	llvm::CallingConv::ID getLLVMDefaultCC() const;
	llvm::CallingConv::ID getABIDefaultCC() const;
	void setCCs();

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}
	bool isSwiftErrorInRegister() const override {
	return true;
	}
	bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy,
	unsigned elts) const override;
	};

	class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	ARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
	: TargetCodeGenInfo(std::make_unique<ARMABIInfo>(CGT, K)) {}

	const ARMABIInfo &getABIInfo() const {
	return static_cast<const ARMABIInfo&>(TargetCodeGenInfo::getABIInfo());
	}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	return 13;
	}

	StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
	return "mov\tr7, r7\t\t// marker for objc_retainAutoreleaseReturnValue";
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);

	// 0-15 are the 16 integer registers.
	AssignToArrayRange(CGF.Builder, Address, Four8, 0, 15);
	return false;
	}

	unsigned getSizeOfUnwindException() const override {
	if (getABIInfo().isEABI()) return 88;
	return TargetCodeGenInfo::getSizeOfUnwindException();
	}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	if (GV->isDeclaration())
	return;
	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD)
	return;
	auto *Fn = cast<llvm::Function>(GV);

	if (const auto *TA = FD->getAttr<TargetAttr>()) {
	ParsedTargetAttr Attr = TA->parse();
	if (!Attr.BranchProtection.empty()) {
	TargetInfo::BranchProtectionInfo BPI;
	StringRef DiagMsg;
	StringRef Arch = Attr.Architecture.empty()
	? CGM.getTarget().getTargetOpts().CPU
	: Attr.Architecture;
	if (!CGM.getTarget().validateBranchProtection(Attr.BranchProtection,
	Arch, BPI, DiagMsg)) {
	CGM.getDiags().Report(
	D->getLocation(),
	diag::warn_target_unsupported_branch_protection_attribute)
	<< Arch;
	} else {
	static const char *SignReturnAddrStr[] = {"none", "non-leaf", "all"};
	assert(static_cast<unsigned>(BPI.SignReturnAddr) <= 2 &&
	"Unexpected SignReturnAddressScopeKind");
	Fn->addFnAttr(
	"sign-return-address",
	SignReturnAddrStr[static_cast<int>(BPI.SignReturnAddr)]);

	Fn->addFnAttr("branch-target-enforcement",
	BPI.BranchTargetEnforcement ? "true" : "false");
	}
	} else if (CGM.getLangOpts().BranchTargetEnforcement \|\|
	CGM.getLangOpts().hasSignReturnAddress()) {
	// If the Branch Protection attribute is missing, validate the target
	// Architecture attribute against Branch Protection command line
	// settings.
	if (!CGM.getTarget().isBranchProtectionSupportedArch(Attr.Architecture))
	CGM.getDiags().Report(
	D->getLocation(),
	diag::warn_target_unsupported_branch_protection_attribute)
	<< Attr.Architecture;
	}
	}

	const ARMInterruptAttr *Attr = FD->getAttr<ARMInterruptAttr>();
	if (!Attr)
	return;

	const char *Kind;
	switch (Attr->getInterrupt()) {
	case ARMInterruptAttr::Generic: Kind = ""; break;
	case ARMInterruptAttr::IRQ: Kind = "IRQ"; break;
	case ARMInterruptAttr::FIQ: Kind = "FIQ"; break;
	case ARMInterruptAttr::SWI: Kind = "SWI"; break;
	case ARMInterruptAttr::ABORT: Kind = "ABORT"; break;
	case ARMInterruptAttr::UNDEF: Kind = "UNDEF"; break;
	}

	Fn->addFnAttr("interrupt", Kind);

	ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind();
	if (ABI == ARMABIInfo::APCS)
	return;

	// AAPCS guarantees that sp will be 8-byte aligned on any public interface,
	// however this is not necessarily true on taking any interrupt. Instruct
	// the backend to perform a realignment as part of the function prologue.
	llvm::AttrBuilder B(Fn->getContext());
	B.addStackAlignmentAttr(8);
	Fn->addFnAttrs(B);
	}
	};

	class WindowsARMTargetCodeGenInfo : public ARMTargetCodeGenInfo {
	public:
	WindowsARMTargetCodeGenInfo(CodeGenTypes &CGT, ARMABIInfo::ABIKind K)
	: ARMTargetCodeGenInfo(CGT, K) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override;

	void getDependentLibraryOption(llvm::StringRef Lib,
	llvm::SmallString<24> &Opt) const override {
	Opt = "/DEFAULTLIB:" + qualifyWindowsLibrary(Lib);
	}

	void getDetectMismatchOption(llvm::StringRef Name, llvm::StringRef Value,
	llvm::SmallString<32> &Opt) const override {
	Opt = "/FAILIFMISMATCH:\"" + Name.str() + "=" + Value.str() + "\"";
	}
	};

	void WindowsARMTargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &CGM) const {
	ARMTargetCodeGenInfo::setTargetAttributes(D, GV, CGM);
	if (GV->isDeclaration())
	return;
	addStackProbeTargetAttributes(D, GV, CGM);
	}
	}

	void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
	if (!::classifyReturnType(getCXXABI(), FI, *this))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(),
	FI.getCallingConvention());

	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type, FI.isVariadic(),
	FI.getCallingConvention());


	// Always honor user-specified calling convention.
	if (FI.getCallingConvention() != llvm::CallingConv::C)
	return;

	llvm::CallingConv::ID cc = getRuntimeCC();
	if (cc != llvm::CallingConv::C)
	FI.setEffectiveCallingConvention(cc);
	}

	/// Return the default calling convention that LLVM will use.
	llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
	// The default calling convention that LLVM will infer.
	if (isEABIHF() \|\| getTarget().getTriple().isWatchABI())
	return llvm::CallingConv::ARM_AAPCS_VFP;
	else if (isEABI())
	return llvm::CallingConv::ARM_AAPCS;
	else
	return llvm::CallingConv::ARM_APCS;
	}

	/// Return the calling convention that our ABI would like us to use
	/// as the C calling convention.
	llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
	switch (getABIKind()) {
	case APCS: return llvm::CallingConv::ARM_APCS;
	case AAPCS: return llvm::CallingConv::ARM_AAPCS;
	case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
	case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
	}
	llvm_unreachable("bad ABI kind");
	}

	void ARMABIInfo::setCCs() {
	assert(getRuntimeCC() == llvm::CallingConv::C);

	// Don't muddy up the IR with a ton of explicit annotations if
	// they'd just match what LLVM will infer from the triple.
	llvm::CallingConv::ID abiCC = getABIDefaultCC();
	if (abiCC != getLLVMDefaultCC())
	RuntimeCC = abiCC;
	}

	ABIArgInfo ARMABIInfo::coerceIllegalVector(QualType Ty) const {
	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size <= 32) {
	llvm::Type *ResType =
	llvm::Type::getInt32Ty(getVMContext());
	return ABIArgInfo::getDirect(ResType);
	}
	if (Size == 64 \|\| Size == 128) {
	auto *ResType = llvm::FixedVectorType::get(
	llvm::Type::getInt32Ty(getVMContext()), Size / 32);
	return ABIArgInfo::getDirect(ResType);
	}
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	ABIArgInfo ARMABIInfo::classifyHomogeneousAggregate(QualType Ty,
	const Type *Base,
	uint64_t Members) const {
	assert(Base && "Base class should be set for homogeneous aggregate");
	// Base can be a floating-point or a vector.
	if (const VectorType *VT = Base->getAs<VectorType>()) {
	// FP16 vectors should be converted to integer vectors
	if (!getTarget().hasLegalHalfType() && containsAnyFP16Vectors(Ty)) {
	uint64_t Size = getContext().getTypeSize(VT);
	auto *NewVecTy = llvm::FixedVectorType::get(
	llvm::Type::getInt32Ty(getVMContext()), Size / 32);
	llvm::Type *Ty = llvm::ArrayType::get(NewVecTy, Members);
	return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
	}
	}
	unsigned Align = 0;
	if (getABIKind() == ARMABIInfo::AAPCS \|\|
	getABIKind() == ARMABIInfo::AAPCS_VFP) {
	// For alignment adjusted HFAs, cap the argument alignment to 8, leave it
	// default otherwise.
	Align = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
	unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
	Align = (Align > BaseAlign && Align >= 8) ? 8 : 0;
	}
	return ABIArgInfo::getDirect(nullptr, 0, nullptr, false, Align);
	}

	ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
	unsigned functionCallConv) const {
	// 6.1.2.1 The following argument types are VFP CPRCs:
	// A single-precision floating-point type (including promoted
	// half-precision types); A double-precision floating-point type;
	// A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
	// with a Base Type of a single- or double-precision floating-point type,
	// 64-bit containerized vectors or 128-bit containerized vectors with one
	// to four Elements.
	// Variadic functions should always marshal to the base standard.
	bool IsAAPCS_VFP =
	!isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ false);

	Ty = useFirstFieldIfTransparentUnion(Ty);

	// Handle illegal vector types here.
	if (isIllegalVectorType(Ty))
	return coerceIllegalVector(Ty);

	if (!isAggregateTypeForABI(Ty)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
	Ty = EnumTy->getDecl()->getIntegerType();
	}

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 64)
	return getNaturalAlignIndirect(Ty, /ByVal=/true);

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
	}

	// Ignore empty records.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	if (IsAAPCS_VFP) {
	// Homogeneous Aggregates need to be expanded when we can fit the aggregate
	// into VFP registers.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (isHomogeneousAggregate(Ty, Base, Members))
	return classifyHomogeneousAggregate(Ty, Base, Members);
	} else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
	// WatchOS does have homogeneous aggregates. Note that we intentionally use
	// this convention even for a variadic function: the backend will use GPRs
	// if needed.
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (isHomogeneousAggregate(Ty, Base, Members)) {
	assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
	llvm::Type *Ty =
	llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
	return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
	}
	}

	if (getABIKind() == ARMABIInfo::AAPCS16_VFP &&
	getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) {
	// WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're
	// bigger than 128-bits, they get placed in space allocated by the caller,
	// and a pointer is passed.
	return ABIArgInfo::getIndirect(
	CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false);
	}

	// Support byval for ARM.
	// The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at
	// most 8-byte. We realign the indirect argument if type alignment is bigger
	// than ABI alignment.
	uint64_t ABIAlign = 4;
	uint64_t TyAlign;
	if (getABIKind() == ARMABIInfo::AAPCS_VFP \|\|
	getABIKind() == ARMABIInfo::AAPCS) {
	TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
	ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
	} else {
	TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
	}
	if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
	assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
	/ByVal=/true,
	/Realign=/TyAlign > ABIAlign);
	}

	// On RenderScript, coerce Aggregates <= 64 bytes to an integer array of
	// same size and alignment.
	if (getTarget().isRenderScriptTarget()) {
	return coerceToIntArray(Ty, getContext(), getVMContext());
	}

	// Otherwise, pass by coercing to a structure of the appropriate size.
	llvm::Type* ElemTy;
	unsigned SizeRegs;
	// FIXME: Try to match the types of the arguments more accurately where
	// we can.
	if (TyAlign <= 4) {
	ElemTy = llvm::Type::getInt32Ty(getVMContext());
	SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
	} else {
	ElemTy = llvm::Type::getInt64Ty(getVMContext());
	SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
	}

	return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
	}

	static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
	llvm::LLVMContext &VMContext) {
	// APCS, C Language Calling Conventions, Non-Simple Return Values: A structure
	// is called integer-like if its size is less than or equal to one word, and
	// the offset of each of its addressable sub-fields is zero.

	uint64_t Size = Context.getTypeSize(Ty);

	// Check that the type fits in a word.
	if (Size > 32)
	return false;

	// FIXME: Handle vector types!
	if (Ty->isVectorType())
	return false;

	// Float types are never treated as "integer like".
	if (Ty->isRealFloatingType())
	return false;

	// If this is a builtin or pointer type then it is ok.
	if (Ty->getAs<BuiltinType>() \|\| Ty->isPointerType())
	return true;

	// Small complex integer types are "integer like".
	if (const ComplexType *CT = Ty->getAs<ComplexType>())
	return isIntegerLikeType(CT->getElementType(), Context, VMContext);

	// Single element and zero sized arrays should be allowed, by the definition
	// above, but they are not.

	// Otherwise, it must be a record type.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (!RT) return false;

	// Ignore records with flexible arrays.
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return false;

	// Check that all sub-fields are at offset 0, and are themselves "integer
	// like".
	const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);

	bool HadField = false;
	unsigned idx = 0;
	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
	i != e; ++i, ++idx) {
	const FieldDecl FD = i;

	// Bit-fields are not addressable, we only need to verify they are "integer
	// like". We still have to disallow a subsequent non-bitfield, for example:
	// struct { int : 0; int x }
	// is non-integer like according to gcc.
	if (FD->isBitField()) {
	if (!RD->isUnion())
	HadField = true;

	if (!isIntegerLikeType(FD->getType(), Context, VMContext))
	return false;

	continue;
	}

	// Check if this field is at offset 0.
	if (Layout.getFieldOffset(idx) != 0)
	return false;

	if (!isIntegerLikeType(FD->getType(), Context, VMContext))
	return false;

	// Only allow at most one field in a structure. This doesn't match the
	// wording above, but follows gcc in situations with a field following an
	// empty structure.
	if (!RD->isUnion()) {
	if (HadField)
	return false;

	HadField = true;
	}
	}

	return true;
	}

	ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
	unsigned functionCallConv) const {

	// Variadic functions should always marshal to the base standard.
	bool IsAAPCS_VFP =
	!isVariadic && isEffectivelyAAPCS_VFP(functionCallConv, /* AAPCS16 */ true);

	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (const VectorType *VT = RetTy->getAs<VectorType>()) {
	// Large vector types should be returned via memory.
	if (getContext().getTypeSize(RetTy) > 128)
	return getNaturalAlignIndirect(RetTy);
	// TODO: FP16/BF16 vectors should be converted to integer vectors
	// This check is similar to isIllegalVectorType - refactor?
	if ((!getTarget().hasLegalHalfType() &&
	(VT->getElementType()->isFloat16Type() \|\|
	VT->getElementType()->isHalfType())) \|\|
	(IsFloatABISoftFP &&
	VT->getElementType()->isBFloat16Type()))
	return coerceIllegalVector(RetTy);
	}

	if (!isAggregateTypeForABI(RetTy)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() > 64)
	return getNaturalAlignIndirect(RetTy, /ByVal=/false);

	return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect();
	}

	// Are we following APCS?
	if (getABIKind() == APCS) {
	if (isEmptyRecord(getContext(), RetTy, false))
	return ABIArgInfo::getIgnore();

	// Complex types are all returned as packed integers.
	//
	// FIXME: Consider using 2 x vector types if the back end handles them
	// correctly.
	if (RetTy->isAnyComplexType())
	return ABIArgInfo::getDirect(llvm::IntegerType::get(
	getVMContext(), getContext().getTypeSize(RetTy)));

	// Integer like structures are returned in r0.
	if (isIntegerLikeType(RetTy, getContext(), getVMContext())) {
	// Return in the smallest viable integer type.
	uint64_t Size = getContext().getTypeSize(RetTy);
	if (Size <= 8)
	return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
	if (Size <= 16)
	return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
	return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
	}

	// Otherwise return in memory.
	return getNaturalAlignIndirect(RetTy);
	}

	// Otherwise this is an AAPCS variant.

	if (isEmptyRecord(getContext(), RetTy, true))
	return ABIArgInfo::getIgnore();

	// Check for homogeneous aggregates with AAPCS-VFP.
	if (IsAAPCS_VFP) {
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (isHomogeneousAggregate(RetTy, Base, Members))
	return classifyHomogeneousAggregate(RetTy, Base, Members);
	}

	// Aggregates <= 4 bytes are returned in r0; other aggregates
	// are returned indirectly.
	uint64_t Size = getContext().getTypeSize(RetTy);
	if (Size <= 32) {
	// On RenderScript, coerce Aggregates <= 4 bytes to an integer array of
	// same size and alignment.
	if (getTarget().isRenderScriptTarget()) {
	return coerceToIntArray(RetTy, getContext(), getVMContext());
	}
	if (getDataLayout().isBigEndian())
	// Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
	return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));

	// Return in the smallest viable integer type.
	if (Size <= 8)
	return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
	if (Size <= 16)
	return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
	return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
	} else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
	llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
	llvm::Type *CoerceTy =
	llvm::ArrayType::get(Int32Ty, llvm::alignTo(Size, 32) / 32);
	return ABIArgInfo::getDirect(CoerceTy);
	}

	return getNaturalAlignIndirect(RetTy);
	}

	/// isIllegalVector - check whether Ty is an illegal vector type.
	bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
	if (const VectorType *VT = Ty->getAs<VectorType> ()) {
	// On targets that don't support half, fp16 or bfloat, they are expanded
	// into float, and we don't want the ABI to depend on whether or not they
	// are supported in hardware. Thus return false to coerce vectors of these
	// types into integer vectors.
	// We do not depend on hasLegalHalfType for bfloat as it is a
	// separate IR type.
	if ((!getTarget().hasLegalHalfType() &&
	(VT->getElementType()->isFloat16Type() \|\|
	VT->getElementType()->isHalfType())) \|\|
	(IsFloatABISoftFP &&
	VT->getElementType()->isBFloat16Type()))
	return true;
	if (isAndroid()) {
	// Android shipped using Clang 3.1, which supported a slightly different
	// vector ABI. The primary differences were that 3-element vector types
	// were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
	// accepts that legacy behavior for Android only.
	// Check whether VT is legal.
	unsigned NumElements = VT->getNumElements();
	// NumElements should be power of 2 or equal to 3.
	if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
	return true;
	} else {
	// Check whether VT is legal.
	unsigned NumElements = VT->getNumElements();
	uint64_t Size = getContext().getTypeSize(VT);
	// NumElements should be power of 2.
	if (!llvm::isPowerOf2_32(NumElements))
	return true;
	// Size should be greater than 32 bits.
	return Size <= 32;
	}
	}
	return false;
	}

	/// Return true if a type contains any 16-bit floating point vectors
	bool ARMABIInfo::containsAnyFP16Vectors(QualType Ty) const {
	if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
	uint64_t NElements = AT->getSize().getZExtValue();
	if (NElements == 0)
	return false;
	return containsAnyFP16Vectors(AT->getElementType());
	} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	if (llvm::any_of(CXXRD->bases(), [this](const CXXBaseSpecifier &B) {
	return containsAnyFP16Vectors(B.getType());
	}))
	return true;

	if (llvm::any_of(RD->fields(), [this](FieldDecl *FD) {
	return FD && containsAnyFP16Vectors(FD->getType());
	}))
	return true;

	return false;
	} else {
	if (const VectorType *VT = Ty->getAs<VectorType>())
	return (VT->getElementType()->isFloat16Type() \|\|
	VT->getElementType()->isBFloat16Type() \|\|
	VT->getElementType()->isHalfType());
	return false;
	}
	}

	bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize,
	llvm::Type *eltTy,
	unsigned numElts) const {
	if (!llvm::isPowerOf2_32(numElts))
	return false;
	unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy);
	if (size > 64)
	return false;
	if (vectorSize.getQuantity() != 8 &&
	(vectorSize.getQuantity() != 16 \|\| numElts == 1))
	return false;
	return true;
	}

	bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
	// Homogeneous aggregates for AAPCS-VFP must have base types of float,
	// double, or 64-bit or 128-bit vectors.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
	if (BT->getKind() == BuiltinType::Float \|\|
	BT->getKind() == BuiltinType::Double \|\|
	BT->getKind() == BuiltinType::LongDouble)
	return true;
	} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
	unsigned VecSize = getContext().getTypeSize(VT);
	if (VecSize == 64 \|\| VecSize == 128)
	return true;
	}
	return false;
	}

	bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
	uint64_t Members) const {
	return Members <= 4;
	}

	bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
	// AAPCS32 says that the rule for whether something is a homogeneous
	// aggregate is applied to the output of the data layout decision. So
	// anything that doesn't affect the data layout also does not affect
	// homogeneity. In particular, zero-length bitfields don't stop a struct
	// being homogeneous.
	return true;
	}

	bool ARMABIInfo::isEffectivelyAAPCS_VFP(unsigned callConvention,
	bool acceptHalf) const {
	// Give precedence to user-specified calling conventions.
	if (callConvention != llvm::CallingConv::C)
	return (callConvention == llvm::CallingConv::ARM_AAPCS_VFP);
	else
	return (getABIKind() == AAPCS_VFP) \|\|
	(acceptHalf && (getABIKind() == AAPCS16_VFP));
	}

	Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	CharUnits SlotSize = CharUnits::fromQuantity(4);

	// Empty records are ignored for parameter passing purposes.
	if (isEmptyRecord(getContext(), Ty, true)) {
	- Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
	- getVAListElementType(CGF), SlotSize);
	- Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
	- return Addr;
	+ VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
	+ auto *Load = CGF.Builder.CreateLoad(VAListAddr);
	+ Address Addr = Address(Load, CGF.Int8Ty, SlotSize);
	+ return CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
	}

	CharUnits TySize = getContext().getTypeSizeInChars(Ty);
	CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty);

	// Use indirect if size of the illegal vector is bigger than 16 bytes.
	bool IsIndirect = false;
	const Type *Base = nullptr;
	uint64_t Members = 0;
	if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
	IsIndirect = true;

	// ARMv7k passes structs bigger than 16 bytes indirectly, in space
	// allocated by the caller.
	} else if (TySize > CharUnits::fromQuantity(16) &&
	getABIKind() == ARMABIInfo::AAPCS16_VFP &&
	!isHomogeneousAggregate(Ty, Base, Members)) {
	IsIndirect = true;

	// Otherwise, bound the type's ABI alignment.
	// The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
	// APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
	// Our callers should be prepared to handle an under-aligned address.
	} else if (getABIKind() == ARMABIInfo::AAPCS_VFP \|\|
	getABIKind() == ARMABIInfo::AAPCS) {
	TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
	TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8));
	} else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
	// ARMv7k allows type alignment up to 16 bytes.
	TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
	TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16));
	} else {
	TyAlignForABI = CharUnits::fromQuantity(4);
	}

	TypeInfoChars TyInfo(TySize, TyAlignForABI, AlignRequirementKind::None);
	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
	SlotSize, /AllowHigherAlign/ true);
	}

	//===----------------------------------------------------------------------===//
	// NVPTX ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class NVPTXTargetCodeGenInfo;

	class NVPTXABIInfo : public ABIInfo {
	NVPTXTargetCodeGenInfo &CGInfo;

	public:
	NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
	: ABIInfo(CGT), CGInfo(Info) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType Ty) const;

	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	bool isUnsupportedType(QualType T) const;
	ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
	};

	class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &M) const override;
	bool shouldEmitStaticExternCAliases() const override;

	llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
	// On the device side, surface reference is represented as an object handle
	// in 64-bit integer.
	return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
	}

	llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
	// On the device side, texture reference is represented as an object handle
	// in 64-bit integer.
	return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
	}

	bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
	LValue Src) const override {
	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
	return true;
	}

	bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
	LValue Src) const override {
	emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
	return true;
	}

	private:
	// Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
	// resulting MDNode to the nvvm.annotations MDNode.
	static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
	int Operand);

	static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
	LValue Src) {
	llvm::Value *Handle = nullptr;
	llvm::Constant *C =
	llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
	// Lookup `addrspacecast` through the constant pointer if any.
	if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
	C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
	if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
	// Load the handle from the specific global variable using
	// `nvvm.texsurf.handle.internal` intrinsic.
	Handle = CGF.EmitRuntimeCall(
	CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
	{GV->getType()}),
	{GV}, "texsurf_handle");
	} else
	Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
	CGF.EmitStoreOfScalar(Handle, Dst);
	}
	};

	/// Checks if the type is unsupported directly by the current target.
	bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
	ASTContext &Context = getContext();
	if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
	return true;
	if (!Context.getTargetInfo().hasFloat128Type() &&
	(T->isFloat128Type() \|\|
	(T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
	return true;
	if (const auto *EIT = T->getAs<BitIntType>())
	return EIT->getNumBits() >
	(Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
	if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
	Context.getTypeSize(T) > 64U)
	return true;
	if (const auto *AT = T->getAsArrayTypeUnsafe())
	return isUnsupportedType(AT->getElementType());
	const auto *RT = T->getAs<RecordType>();
	if (!RT)
	return false;
	const RecordDecl *RD = RT->getDecl();

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	for (const CXXBaseSpecifier &I : CXXRD->bases())
	if (isUnsupportedType(I.getType()))
	return true;

	for (const FieldDecl *I : RD->fields())
	if (isUnsupportedType(I->getType()))
	return true;
	return false;
	}

	/// Coerce the given type into an array with maximum allowed size of elements.
	ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
	unsigned MaxSize) const {
	// Alignment and Size are measured in bits.
	const uint64_t Size = getContext().getTypeSize(Ty);
	const uint64_t Alignment = getContext().getTypeAlign(Ty);
	const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
	llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
	const uint64_t NumElements = (Size + Div - 1) / Div;
	return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
	}

	ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	if (getContext().getLangOpts().OpenMP &&
	getContext().getLangOpts().OpenMPIsDevice && isUnsupportedType(RetTy))
	return coerceToIntArrayWithLimit(RetTy, 64);

	// note: this is different from default ABI
	if (!RetTy->isScalarType())
	return ABIArgInfo::getDirect();

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Return aggregates type as indirect by value
	if (isAggregateTypeForABI(Ty)) {
	// Under CUDA device compilation, tex/surf builtin types are replaced with
	// object types and passed directly.
	if (getContext().getLangOpts().CUDAIsDevice) {
	if (Ty->isCUDADeviceBuiltinSurfaceType())
	return ABIArgInfo::getDirect(
	CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
	if (Ty->isCUDADeviceBuiltinTextureType())
	return ABIArgInfo::getDirect(
	CGInfo.getCUDADeviceBuiltinTextureDeviceType());
	}
	return getNaturalAlignIndirect(Ty, /* byval */ true);
	}

	if (const auto *EIT = Ty->getAs<BitIntType>()) {
	if ((EIT->getNumBits() > 128) \|\|
	(!getContext().getTargetInfo().hasInt128Type() &&
	EIT->getNumBits() > 64))
	return getNaturalAlignIndirect(Ty, /* byval */ true);
	}

	return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect());
	}

	void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);

	// Always honor user-specified calling convention.
	if (FI.getCallingConvention() != llvm::CallingConv::C)
	return;

	FI.setEffectiveCallingConvention(getRuntimeCC());
	}

	Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	llvm_unreachable("NVPTX does not support varargs");
	}

	void NVPTXTargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
	if (GV->isDeclaration())
	return;
	const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
	if (VD) {
	if (M.getLangOpts().CUDA) {
	if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
	addNVVMMetadata(GV, "surface", 1);
	else if (VD->getType()->isCUDADeviceBuiltinTextureType())
	addNVVMMetadata(GV, "texture", 1);
	return;
	}
	}

	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD) return;

	llvm::Function *F = cast<llvm::Function>(GV);

	// Perform special handling in OpenCL mode
	if (M.getLangOpts().OpenCL) {
	// Use OpenCL function attributes to check for kernel functions
	// By default, all functions are device functions
	if (FD->hasAttr<OpenCLKernelAttr>()) {
	// OpenCL __kernel functions get kernel metadata
	// Create !{<func-ref>, metadata !"kernel", i32 1} node
	addNVVMMetadata(F, "kernel", 1);
	// And kernel functions are not subject to inlining
	F->addFnAttr(llvm::Attribute::NoInline);
	}
	}

	// Perform special handling in CUDA mode.
	if (M.getLangOpts().CUDA) {
	// CUDA __global__ functions get a kernel metadata entry. Since
	// __global__ functions cannot be called from the device, we do not
	// need to set the noinline attribute.
	if (FD->hasAttr<CUDAGlobalAttr>()) {
	// Create !{<func-ref>, metadata !"kernel", i32 1} node
	addNVVMMetadata(F, "kernel", 1);
	}
	if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
	// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
	llvm::APSInt MaxThreads(32);
	MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
	if (MaxThreads > 0)
	addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());

	// min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
	// not specified in __launch_bounds__ or if the user specified a 0 value,
	// we don't have to add a PTX directive.
	if (Attr->getMinBlocks()) {
	llvm::APSInt MinBlocks(32);
	MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
	if (MinBlocks > 0)
	// Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
	addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
	}
	}
	}
	}

	void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
	StringRef Name, int Operand) {
	llvm::Module *M = GV->getParent();
	llvm::LLVMContext &Ctx = M->getContext();

	// Get "nvvm.annotations" metadata node
	llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");

	llvm::Metadata *MDVals[] = {
	llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
	llvm::ConstantAsMetadata::get(
	llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
	// Append metadata to nvvm.annotations
	MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
	}

	bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
	return false;
	}
	}

	//===----------------------------------------------------------------------===//
	// SystemZ ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class SystemZABIInfo : public SwiftABIInfo {
	bool HasVector;
	bool IsSoftFloatABI;

	public:
	SystemZABIInfo(CodeGenTypes &CGT, bool HV, bool SF)
	: SwiftABIInfo(CGT), HasVector(HV), IsSoftFloatABI(SF) {}

	bool isPromotableIntegerTypeForABI(QualType Ty) const;
	bool isCompoundType(QualType Ty) const;
	bool isVectorArgumentType(QualType Ty) const;
	bool isFPArgumentType(QualType Ty) const;
	QualType GetSingleElementType(QualType Ty) const;

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType ArgTy) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	bool shouldPassIndirectlyForSwift(ArrayRef<llvm::Type*> scalars,
	bool asReturnValue) const override {
	return occupiesMoreThan(CGT, scalars, /total/ 4);
	}
	bool isSwiftErrorInRegister() const override {
	return false;
	}
	};

	class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	SystemZTargetCodeGenInfo(CodeGenTypes &CGT, bool HasVector, bool SoftFloatABI)
	: TargetCodeGenInfo(
	std::make_unique<SystemZABIInfo>(CGT, HasVector, SoftFloatABI)) {}

	llvm::Value testFPKind(llvm::Value V, unsigned BuiltinID,
	CGBuilderTy &Builder,
	CodeGenModule &CGM) const override {
	assert(V->getType()->isFloatingPointTy() && "V should have an FP type.");
	// Only use TDC in constrained FP mode.
	if (!Builder.getIsFPConstrained())
	return nullptr;

	llvm::Type *Ty = V->getType();
	if (Ty->isFloatTy() \|\| Ty->isDoubleTy() \|\| Ty->isFP128Ty()) {
	llvm::Module &M = CGM.getModule();
	auto &Ctx = M.getContext();
	llvm::Function *TDCFunc =
	llvm::Intrinsic::getDeclaration(&M, llvm::Intrinsic::s390_tdc, Ty);
	unsigned TDCBits = 0;
	switch (BuiltinID) {
	case Builtin::BI__builtin_isnan:
	TDCBits = 0xf;
	break;
	case Builtin::BIfinite:
	case Builtin::BI__finite:
	case Builtin::BIfinitef:
	case Builtin::BI__finitef:
	case Builtin::BIfinitel:
	case Builtin::BI__finitel:
	case Builtin::BI__builtin_isfinite:
	TDCBits = 0xfc0;
	break;
	case Builtin::BI__builtin_isinf:
	TDCBits = 0x30;
	break;
	default:
	break;
	}
	if (TDCBits)
	return Builder.CreateCall(
	TDCFunc,
	{V, llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), TDCBits)});
	}
	return nullptr;
	}
	};
	}

	bool SystemZABIInfo::isPromotableIntegerTypeForABI(QualType Ty) const {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Promotable integer types are required to be promoted by the ABI.
	if (ABIInfo::isPromotableIntegerTypeForABI(Ty))
	return true;

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() < 64)
	return true;

	// 32-bit values must also be promoted.
	if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
	switch (BT->getKind()) {
	case BuiltinType::Int:
	case BuiltinType::UInt:
	return true;
	default:
	return false;
	}
	return false;
	}

	bool SystemZABIInfo::isCompoundType(QualType Ty) const {
	return (Ty->isAnyComplexType() \|\|
	Ty->isVectorType() \|\|
	isAggregateTypeForABI(Ty));
	}

	bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
	return (HasVector &&
	Ty->isVectorType() &&
	getContext().getTypeSize(Ty) <= 128);
	}

	bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
	if (IsSoftFloatABI)
	return false;

	if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
	switch (BT->getKind()) {
	case BuiltinType::Float:
	case BuiltinType::Double:
	return true;
	default:
	return false;
	}

	return false;
	}

	QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
	const RecordType *RT = Ty->getAs<RecordType>();

	if (RT && RT->isStructureOrClassType()) {
	const RecordDecl *RD = RT->getDecl();
	QualType Found;

	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
	for (const auto &I : CXXRD->bases()) {
	QualType Base = I.getType();

	// Empty bases don't affect things either way.
	if (isEmptyRecord(getContext(), Base, true))
	continue;

	if (!Found.isNull())
	return Ty;
	Found = GetSingleElementType(Base);
	}

	// Check the fields.
	for (const auto *FD : RD->fields()) {
	// Unlike isSingleElementStruct(), empty structure and array fields
	// do count. So do anonymous bitfields that aren't zero-sized.

	// Like isSingleElementStruct(), ignore C++20 empty data members.
	if (FD->hasAttr<NoUniqueAddressAttr>() &&
	isEmptyRecord(getContext(), FD->getType(), true))
	continue;

	// Unlike isSingleElementStruct(), arrays do not count.
	// Nested structures still do though.
	if (!Found.isNull())
	return Ty;
	Found = GetSingleElementType(FD->getType());
	}

	// Unlike isSingleElementStruct(), trailing padding is allowed.
	// An 8-byte aligned struct s { float f; } is passed as a double.
	if (!Found.isNull())
	return Found;
	}

	return Ty;
	}

	Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	// Assume that va_list type is correct; should be pointer to LLVM type:
	// struct {
	// i64 __gpr;
	// i64 __fpr;
	// i8 *__overflow_arg_area;
	// i8 *__reg_save_area;
	// };

	// Every non-vector argument occupies 8 bytes and is passed by preference
	// in either GPRs or FPRs. Vector arguments occupy 8 or 16 bytes and are
	// always passed on the stack.
	Ty = getContext().getCanonicalType(Ty);
	auto TyInfo = getContext().getTypeInfoInChars(Ty);
	llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
	llvm::Type *DirectTy = ArgTy;
	ABIArgInfo AI = classifyArgumentType(Ty);
	bool IsIndirect = AI.isIndirect();
	bool InFPRs = false;
	bool IsVector = false;
	CharUnits UnpaddedSize;
	CharUnits DirectAlign;
	if (IsIndirect) {
	DirectTy = llvm::PointerType::getUnqual(DirectTy);
	UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
	} else {
	if (AI.getCoerceToType())
	ArgTy = AI.getCoerceToType();
	InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() \|\| ArgTy->isDoubleTy()));
	IsVector = ArgTy->isVectorTy();
	UnpaddedSize = TyInfo.Width;
	DirectAlign = TyInfo.Align;
	}
	CharUnits PaddedSize = CharUnits::fromQuantity(8);
	if (IsVector && UnpaddedSize > PaddedSize)
	PaddedSize = CharUnits::fromQuantity(16);
	assert((UnpaddedSize <= PaddedSize) && "Invalid argument size.");

	CharUnits Padding = (PaddedSize - UnpaddedSize);

	llvm::Type *IndexTy = CGF.Int64Ty;
	llvm::Value *PaddedSizeV =
	llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity());

	if (IsVector) {
	// Work out the address of a vector argument on the stack.
	// Vector arguments are always passed in the high bits of a
	// single (8 byte) or double (16 byte) stack slot.
	Address OverflowArgAreaPtr =
	CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
	Address OverflowArgArea =
	Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
	CGF.Int8Ty, TyInfo.Align);
	Address MemAddr =
	CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");

	// Update overflow_arg_area_ptr pointer
	llvm::Value *NewOverflowArgArea = CGF.Builder.CreateGEP(
	OverflowArgArea.getElementType(), OverflowArgArea.getPointer(),
	PaddedSizeV, "overflow_arg_area");
	CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);

	return MemAddr;
	}

	assert(PaddedSize.getQuantity() == 8);

	unsigned MaxRegs, RegCountField, RegSaveIndex;
	CharUnits RegPadding;
	if (InFPRs) {
	MaxRegs = 4; // Maximum of 4 FPR arguments
	RegCountField = 1; // __fpr
	RegSaveIndex = 16; // save offset for f0
	RegPadding = CharUnits(); // floats are passed in the high bits of an FPR
	} else {
	MaxRegs = 5; // Maximum of 5 GPR arguments
	RegCountField = 0; // __gpr
	RegSaveIndex = 2; // save offset for r2
	RegPadding = Padding; // values are passed in the low bits of a GPR
	}

	Address RegCountPtr =
	CGF.Builder.CreateStructGEP(VAListAddr, RegCountField, "reg_count_ptr");
	llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
	llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
	llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
	"fits_in_regs");

	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
	llvm::BasicBlock *InMemBlock = CGF.createBasicBlock("vaarg.in_mem");
	llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
	CGF.Builder.CreateCondBr(InRegs, InRegBlock, InMemBlock);

	// Emit code to load the value if it was passed in registers.
	CGF.EmitBlock(InRegBlock);

	// Work out the address of an argument register.
	llvm::Value *ScaledRegCount =
	CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
	llvm::Value *RegBase =
	llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity()
	+ RegPadding.getQuantity());
	llvm::Value *RegOffset =
	CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
	Address RegSaveAreaPtr =
	CGF.Builder.CreateStructGEP(VAListAddr, 3, "reg_save_area_ptr");
	llvm::Value *RegSaveArea =
	CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
	Address RawRegAddr(
	CGF.Builder.CreateGEP(CGF.Int8Ty, RegSaveArea, RegOffset, "raw_reg_addr"),
	CGF.Int8Ty, PaddedSize);
	Address RegAddr =
	CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr");

	// Update the register count
	llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1);
	llvm::Value *NewRegCount =
	CGF.Builder.CreateAdd(RegCount, One, "reg_count");
	CGF.Builder.CreateStore(NewRegCount, RegCountPtr);
	CGF.EmitBranch(ContBlock);

	// Emit code to load the value if it was passed in memory.
	CGF.EmitBlock(InMemBlock);

	// Work out the address of a stack argument.
	Address OverflowArgAreaPtr =
	CGF.Builder.CreateStructGEP(VAListAddr, 2, "overflow_arg_area_ptr");
	Address OverflowArgArea =
	Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
	CGF.Int8Ty, PaddedSize);
	Address RawMemAddr =
	CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr");
	Address MemAddr =
	CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr");

	// Update overflow_arg_area_ptr pointer
	llvm::Value *NewOverflowArgArea =
	CGF.Builder.CreateGEP(OverflowArgArea.getElementType(),
	OverflowArgArea.getPointer(), PaddedSizeV,
	"overflow_arg_area");
	CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
	CGF.EmitBranch(ContBlock);

	// Return the appropriate result.
	CGF.EmitBlock(ContBlock);
	Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
	"va_arg.addr");

	if (IsIndirect)
	ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"), ArgTy,
	TyInfo.Align);

	return ResAddr;
	}

	ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();
	if (isVectorArgumentType(RetTy))
	return ABIArgInfo::getDirect();
	if (isCompoundType(RetTy) \|\| getContext().getTypeSize(RetTy) > 64)
	return getNaturalAlignIndirect(RetTy);
	return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
	// Handle the generic C++ ABI.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	// Integers and enums are extended to full register width.
	if (isPromotableIntegerTypeForABI(Ty))
	return ABIArgInfo::getExtend(Ty);

	// Handle vector types and vector-like structure types. Note that
	// as opposed to float-like structure types, we do not allow any
	// padding for vector-like structures, so verify the sizes match.
	uint64_t Size = getContext().getTypeSize(Ty);
	QualType SingleElementTy = GetSingleElementType(Ty);
	if (isVectorArgumentType(SingleElementTy) &&
	getContext().getTypeSize(SingleElementTy) == Size)
	return ABIArgInfo::getDirect(CGT.ConvertType(SingleElementTy));

	// Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
	if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	// Handle small structures.
	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	// Structures with flexible arrays have variable length, so really
	// fail the size test above.
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	// The structure is passed as an unextended integer, a float, or a double.
	llvm::Type *PassTy;
	if (isFPArgumentType(SingleElementTy)) {
	assert(Size == 32 \|\| Size == 64);
	if (Size == 32)
	PassTy = llvm::Type::getFloatTy(getVMContext());
	else
	PassTy = llvm::Type::getDoubleTy(getVMContext());
	} else
	PassTy = llvm::IntegerType::get(getVMContext(), Size);
	return ABIArgInfo::getDirect(PassTy);
	}

	// Non-structure compounds are passed indirectly.
	if (isCompoundType(Ty))
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	return ABIArgInfo::getDirect(nullptr);
	}

	//===----------------------------------------------------------------------===//
	// MSP430 ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class MSP430ABIInfo : public DefaultABIInfo {
	static ABIArgInfo complexArgInfo() {
	ABIArgInfo Info = ABIArgInfo::getDirect();
	Info.setCanBeFlattened(false);
	return Info;
	}

	public:
	MSP430ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const {
	if (RetTy->isAnyComplexType())
	return complexArgInfo();

	return DefaultABIInfo::classifyReturnType(RetTy);
	}

	ABIArgInfo classifyArgumentType(QualType RetTy) const {
	if (RetTy->isAnyComplexType())
	return complexArgInfo();

	return DefaultABIInfo::classifyArgumentType(RetTy);
	}

	// Just copy the original implementations because
	// DefaultABIInfo::classify{Return,Argument}Type() are not virtual
	void computeInfo(CGFunctionInfo &FI) const override {
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type);
	}

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override {
	return EmitVAArgInstr(CGF, VAListAddr, Ty, classifyArgumentType(Ty));
	}
	};

	class MSP430TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	MSP430TargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<MSP430ABIInfo>(CGT)) {}
	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &M) const override;
	};

	}

	void MSP430TargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
	if (GV->isDeclaration())
	return;
	if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	const auto *InterruptAttr = FD->getAttr<MSP430InterruptAttr>();
	if (!InterruptAttr)
	return;

	// Handle 'interrupt' attribute:
	llvm::Function *F = cast<llvm::Function>(GV);

	// Step 1: Set ISR calling convention.
	F->setCallingConv(llvm::CallingConv::MSP430_INTR);

	// Step 2: Add attributes goodness.
	F->addFnAttr(llvm::Attribute::NoInline);
	F->addFnAttr("interrupt", llvm::utostr(InterruptAttr->getNumber()));
	}
	}

	//===----------------------------------------------------------------------===//
	// MIPS ABI Implementation. This works for both little-endian and
	// big-endian variants.
	//===----------------------------------------------------------------------===//

	namespace {
	class MipsABIInfo : public ABIInfo {
	bool IsO32;
	unsigned MinABIStackAlignInBytes, StackAlignInBytes;
	void CoerceToIntArgs(uint64_t TySize,
	SmallVectorImpl<llvm::Type *> &ArgList) const;
	llvm::Type* HandleAggregates(QualType Ty, uint64_t TySize) const;
	llvm::Type* returnAggregateInRegs(QualType RetTy, uint64_t Size) const;
	llvm::Type* getPaddingType(uint64_t Align, uint64_t Offset) const;
	public:
	MipsABIInfo(CodeGenTypes &CGT, bool _IsO32) :
	ABIInfo(CGT), IsO32(_IsO32), MinABIStackAlignInBytes(IsO32 ? 4 : 8),
	StackAlignInBytes(IsO32 ? 8 : 16) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	ABIArgInfo extendType(QualType Ty) const;
	};

	class MIPSTargetCodeGenInfo : public TargetCodeGenInfo {
	unsigned SizeOfUnwindException;
	public:
	MIPSTargetCodeGenInfo(CodeGenTypes &CGT, bool IsO32)
	: TargetCodeGenInfo(std::make_unique<MipsABIInfo>(CGT, IsO32)),
	SizeOfUnwindException(IsO32 ? 24 : 32) {}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &CGM) const override {
	return 29;
	}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD) return;
	llvm::Function *Fn = cast<llvm::Function>(GV);

	if (FD->hasAttr<MipsLongCallAttr>())
	Fn->addFnAttr("long-call");
	else if (FD->hasAttr<MipsShortCallAttr>())
	Fn->addFnAttr("short-call");

	// Other attributes do not have a meaning for declarations.
	if (GV->isDeclaration())
	return;

	if (FD->hasAttr<Mips16Attr>()) {
	Fn->addFnAttr("mips16");
	}
	else if (FD->hasAttr<NoMips16Attr>()) {
	Fn->addFnAttr("nomips16");
	}

	if (FD->hasAttr<MicroMipsAttr>())
	Fn->addFnAttr("micromips");
	else if (FD->hasAttr<NoMicroMipsAttr>())
	Fn->addFnAttr("nomicromips");

	const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
	if (!Attr)
	return;

	const char *Kind;
	switch (Attr->getInterrupt()) {
	case MipsInterruptAttr::eic: Kind = "eic"; break;
	case MipsInterruptAttr::sw0: Kind = "sw0"; break;
	case MipsInterruptAttr::sw1: Kind = "sw1"; break;
	case MipsInterruptAttr::hw0: Kind = "hw0"; break;
	case MipsInterruptAttr::hw1: Kind = "hw1"; break;
	case MipsInterruptAttr::hw2: Kind = "hw2"; break;
	case MipsInterruptAttr::hw3: Kind = "hw3"; break;
	case MipsInterruptAttr::hw4: Kind = "hw4"; break;
	case MipsInterruptAttr::hw5: Kind = "hw5"; break;
	}

	Fn->addFnAttr("interrupt", Kind);

	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;

	unsigned getSizeOfUnwindException() const override {
	return SizeOfUnwindException;
	}
	};
	}

	void MipsABIInfo::CoerceToIntArgs(
	uint64_t TySize, SmallVectorImpl<llvm::Type *> &ArgList) const {
	llvm::IntegerType *IntTy =
	llvm::IntegerType::get(getVMContext(), MinABIStackAlignInBytes * 8);

	// Add (TySize / MinABIStackAlignInBytes) args of IntTy.
	for (unsigned N = TySize / (MinABIStackAlignInBytes * 8); N; --N)
	ArgList.push_back(IntTy);

	// If necessary, add one more integer type to ArgList.
	unsigned R = TySize % (MinABIStackAlignInBytes * 8);

	if (R)
	ArgList.push_back(llvm::IntegerType::get(getVMContext(), R));
	}

	// In N32/64, an aligned double precision floating point field is passed in
	// a register.
	llvm::Type* MipsABIInfo::HandleAggregates(QualType Ty, uint64_t TySize) const {
	SmallVector<llvm::Type*, 8> ArgList, IntArgList;

	if (IsO32) {
	CoerceToIntArgs(TySize, ArgList);
	return llvm::StructType::get(getVMContext(), ArgList);
	}

	if (Ty->isComplexType())
	return CGT.ConvertType(Ty);

	const RecordType *RT = Ty->getAs<RecordType>();

	// Unions/vectors are passed in integer registers.
	if (!RT \|\| !RT->isStructureOrClassType()) {
	CoerceToIntArgs(TySize, ArgList);
	return llvm::StructType::get(getVMContext(), ArgList);
	}

	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
	assert(!(TySize % 8) && "Size of structure must be multiple of 8.");

	uint64_t LastOffset = 0;
	unsigned idx = 0;
	llvm::IntegerType *I64 = llvm::IntegerType::get(getVMContext(), 64);

	// Iterate over fields in the struct/class and check if there are any aligned
	// double fields.
	for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end();
	i != e; ++i, ++idx) {
	const QualType Ty = i->getType();
	const BuiltinType *BT = Ty->getAs<BuiltinType>();

	if (!BT \|\| BT->getKind() != BuiltinType::Double)
	continue;

	uint64_t Offset = Layout.getFieldOffset(idx);
	if (Offset % 64) // Ignore doubles that are not aligned.
	continue;

	// Add ((Offset - LastOffset) / 64) args of type i64.
	for (unsigned j = (Offset - LastOffset) / 64; j > 0; --j)
	ArgList.push_back(I64);

	// Add double type.
	ArgList.push_back(llvm::Type::getDoubleTy(getVMContext()));
	LastOffset = Offset + 64;
	}

	CoerceToIntArgs(TySize - LastOffset, IntArgList);
	ArgList.append(IntArgList.begin(), IntArgList.end());

	return llvm::StructType::get(getVMContext(), ArgList);
	}

	llvm::Type *MipsABIInfo::getPaddingType(uint64_t OrigOffset,
	uint64_t Offset) const {
	if (OrigOffset + MinABIStackAlignInBytes > Offset)
	return nullptr;

	return llvm::IntegerType::get(getVMContext(), (Offset - OrigOffset) * 8);
	}

	ABIArgInfo
	MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	uint64_t OrigOffset = Offset;
	uint64_t TySize = getContext().getTypeSize(Ty);
	uint64_t Align = getContext().getTypeAlign(Ty) / 8;

	Align = std::min(std::max(Align, (uint64_t)MinABIStackAlignInBytes),
	(uint64_t)StackAlignInBytes);
	unsigned CurrOffset = llvm::alignTo(Offset, Align);
	Offset = CurrOffset + llvm::alignTo(TySize, Align * 8) / 8;

	if (isAggregateTypeForABI(Ty) \|\| Ty->isVectorType()) {
	// Ignore empty aggregates.
	if (TySize == 0)
	return ABIArgInfo::getIgnore();

	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
	Offset = OrigOffset + MinABIStackAlignInBytes;
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
	}

	// If we have reached here, aggregates are passed directly by coercing to
	// another structure type. Padding is inserted if the offset of the
	// aggregate is unaligned.
	ABIArgInfo ArgInfo =
	ABIArgInfo::getDirect(HandleAggregates(Ty, TySize), 0,
	getPaddingType(OrigOffset, CurrOffset));
	ArgInfo.setInReg(true);
	return ArgInfo;
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Make sure we pass indirectly things that are too large.
	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 128 \|\|
	(EIT->getNumBits() > 64 &&
	!getContext().getTargetInfo().hasInt128Type()))
	return getNaturalAlignIndirect(Ty);

	// All integral types are promoted to the GPR width.
	if (Ty->isIntegralOrEnumerationType())
	return extendType(Ty);

	return ABIArgInfo::getDirect(
	nullptr, 0, IsO32 ? nullptr : getPaddingType(OrigOffset, CurrOffset));
	}

	llvm::Type*
	MipsABIInfo::returnAggregateInRegs(QualType RetTy, uint64_t Size) const {
	const RecordType *RT = RetTy->getAs<RecordType>();
	SmallVector<llvm::Type*, 8> RTList;

	if (RT && RT->isStructureOrClassType()) {
	const RecordDecl *RD = RT->getDecl();
	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
	unsigned FieldCnt = Layout.getFieldCount();

	// N32/64 returns struct/classes in floating point registers if the
	// following conditions are met:
	// 1. The size of the struct/class is no larger than 128-bit.
	// 2. The struct/class has one or two fields all of which are floating
	// point types.
	// 3. The offset of the first field is zero (this follows what gcc does).
	//
	// Any other composite results are returned in integer registers.
	//
	if (FieldCnt && (FieldCnt <= 2) && !Layout.getFieldOffset(0)) {
	RecordDecl::field_iterator b = RD->field_begin(), e = RD->field_end();
	for (; b != e; ++b) {
	const BuiltinType *BT = b->getType()->getAs<BuiltinType>();

	if (!BT \|\| !BT->isFloatingPoint())
	break;

	RTList.push_back(CGT.ConvertType(b->getType()));
	}

	if (b == e)
	return llvm::StructType::get(getVMContext(), RTList,
	RD->hasAttr<PackedAttr>());

	RTList.clear();
	}
	}

	CoerceToIntArgs(Size, RTList);
	return llvm::StructType::get(getVMContext(), RTList);
	}

	ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
	uint64_t Size = getContext().getTypeSize(RetTy);

	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	// O32 doesn't treat zero-sized structs differently from other structs.
	// However, N32/N64 ignores zero sized return values.
	if (!IsO32 && Size == 0)
	return ABIArgInfo::getIgnore();

	if (isAggregateTypeForABI(RetTy) \|\| RetTy->isVectorType()) {
	if (Size <= 128) {
	if (RetTy->isAnyComplexType())
	return ABIArgInfo::getDirect();

	// O32 returns integer vectors in registers and N32/N64 returns all small
	// aggregates in registers.
	if (!IsO32 \|\|
	(RetTy->isVectorType() && !RetTy->hasFloatingRepresentation())) {
	ABIArgInfo ArgInfo =
	ABIArgInfo::getDirect(returnAggregateInRegs(RetTy, Size));
	ArgInfo.setInReg(true);
	return ArgInfo;
	}
	}

	return getNaturalAlignIndirect(RetTy);
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	// Make sure we pass indirectly things that are too large.
	if (const auto *EIT = RetTy->getAs<BitIntType>())
	if (EIT->getNumBits() > 128 \|\|
	(EIT->getNumBits() > 64 &&
	!getContext().getTargetInfo().hasInt128Type()))
	return getNaturalAlignIndirect(RetTy);

	if (isPromotableIntegerTypeForABI(RetTy))
	return ABIArgInfo::getExtend(RetTy);

	if ((RetTy->isUnsignedIntegerOrEnumerationType() \|\|
	RetTy->isSignedIntegerOrEnumerationType()) && Size == 32 && !IsO32)
	return ABIArgInfo::getSignExtend(RetTy);

	return ABIArgInfo::getDirect();
	}

	void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
	ABIArgInfo &RetInfo = FI.getReturnInfo();
	if (!getCXXABI().classifyReturnType(FI))
	RetInfo = classifyReturnType(FI.getReturnType());

	// Check if a pointer to an aggregate is passed as a hidden argument.
	uint64_t Offset = RetInfo.isIndirect() ? MinABIStackAlignInBytes : 0;

	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type, Offset);
	}

	Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType OrigTy) const {
	QualType Ty = OrigTy;

	// Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
	// Pointers are also promoted in the same way but this only matters for N32.
	unsigned SlotSizeInBits = IsO32 ? 32 : 64;
	unsigned PtrWidth = getTarget().getPointerWidth(0);
	bool DidPromote = false;
	if ((Ty->isIntegerType() &&
	getContext().getIntWidth(Ty) < SlotSizeInBits) \|\|
	(Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
	DidPromote = true;
	Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits,
	Ty->isSignedIntegerType());
	}

	auto TyInfo = getContext().getTypeInfoInChars(Ty);

	// The alignment of things in the argument area is never larger than
	// StackAlignInBytes.
	TyInfo.Align =
	std::min(TyInfo.Align, CharUnits::fromQuantity(StackAlignInBytes));

	// MinABIStackAlignInBytes is the size of argument slots on the stack.
	CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);

	Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /indirect/ false,
	TyInfo, ArgSlotSize, /AllowHigherAlign/ true);


	// If there was a promotion, "unpromote" into a temporary.
	// TODO: can we just use a pointer into a subset of the original slot?
	if (DidPromote) {
	Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
	llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);

	// Truncate down to the right width.
	llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType()
	: CGF.IntPtrTy);
	llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
	if (OrigTy->isPointerType())
	V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());

	CGF.Builder.CreateStore(V, Temp);
	Addr = Temp;
	}

	return Addr;
	}

	ABIArgInfo MipsABIInfo::extendType(QualType Ty) const {
	int TySize = getContext().getTypeSize(Ty);

	// MIPS64 ABI requires unsigned 32 bit integers to be sign extended.
	if (Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
	return ABIArgInfo::getSignExtend(Ty);

	return ABIArgInfo::getExtend(Ty);
	}

	bool
	MIPSTargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	// This information comes from gcc's implementation, which seems to
	// as canonical as it gets.

	// Everything on MIPS is 4 bytes. Double-precision FP registers
	// are aliased to pairs of single-precision FP registers.
	llvm::Value *Four8 = llvm::ConstantInt::get(CGF.Int8Ty, 4);

	// 0-31 are the general purpose registers, $0 - $31.
	// 32-63 are the floating-point registers, $f0 - $f31.
	// 64 and 65 are the multiply/divide registers, $hi and $lo.
	// 66 is the (notional, I think) register for signal-handler return.
	AssignToArrayRange(CGF.Builder, Address, Four8, 0, 65);

	// 67-74 are the floating-point status registers, $fcc0 - $fcc7.
	// They are one bit wide and ignored here.

	// 80-111 are the coprocessor 0 registers, $c0r0 - $c0r31.
	// (coprocessor 1 is the FP unit)
	// 112-143 are the coprocessor 2 registers, $c2r0 - $c2r31.
	// 144-175 are the coprocessor 3 registers, $c3r0 - $c3r31.
	// 176-181 are the DSP accumulator registers.
	AssignToArrayRange(CGF.Builder, Address, Four8, 80, 181);
	return false;
	}

	//===----------------------------------------------------------------------===//
	// M68k ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class M68kTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	M68kTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<DefaultABIInfo>(CGT)) {}
	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &M) const override;
	};

	} // namespace

	void M68kTargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
	if (const auto *FD = dyn_cast_or_null<FunctionDecl>(D)) {
	if (const auto *attr = FD->getAttr<M68kInterruptAttr>()) {
	// Handle 'interrupt' attribute:
	llvm::Function *F = cast<llvm::Function>(GV);

	// Step 1: Set ISR calling convention.
	F->setCallingConv(llvm::CallingConv::M68k_INTR);

	// Step 2: Add attributes goodness.
	F->addFnAttr(llvm::Attribute::NoInline);

	// Step 3: Emit ISR vector alias.
	unsigned Num = attr->getNumber() / 2;
	llvm::GlobalAlias::create(llvm::Function::ExternalLinkage,
	"__isr_" + Twine(Num), F);
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// AVR ABI Implementation. Documented at
	// https://gcc.gnu.org/wiki/avr-gcc#Calling_Convention
	// https://gcc.gnu.org/wiki/avr-gcc#Reduced_Tiny
	//===----------------------------------------------------------------------===//

	namespace {
	class AVRABIInfo : public DefaultABIInfo {
	private:
	// The total amount of registers can be used to pass parameters. It is 18 on
	// AVR, or 6 on AVRTiny.
	const unsigned ParamRegs;
	// The total amount of registers can be used to pass return value. It is 8 on
	// AVR, or 4 on AVRTiny.
	const unsigned RetRegs;

	public:
	AVRABIInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
	: DefaultABIInfo(CGT), ParamRegs(NPR), RetRegs(NRR) {}

	ABIArgInfo classifyReturnType(QualType Ty, bool &LargeRet) const {
	if (isAggregateTypeForABI(Ty)) {
	// On AVR, a return struct with size less than or equals to 8 bytes is
	// returned directly via registers R18-R25. On AVRTiny, a return struct
	// with size less than or equals to 4 bytes is returned directly via
	// registers R22-R25.
	if (getContext().getTypeSize(Ty) <= RetRegs * 8)
	return ABIArgInfo::getDirect();
	// A return struct with larger size is returned via a stack
	// slot, along with a pointer to it as the function's implicit argument.
	LargeRet = true;
	return getNaturalAlignIndirect(Ty);
	}
	// Otherwise we follow the default way which is compatible.
	return DefaultABIInfo::classifyReturnType(Ty);
	}

	ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegs) const {
	unsigned TySize = getContext().getTypeSize(Ty);

	// An int8 type argument always costs two registers like an int16.
	if (TySize == 8 && NumRegs >= 2) {
	NumRegs -= 2;
	return ABIArgInfo::getExtend(Ty);
	}

	// If the argument size is an odd number of bytes, round up the size
	// to the next even number.
	TySize = llvm::alignTo(TySize, 16);

	// Any type including an array/struct type can be passed in rgisters,
	// if there are enough registers left.
	if (TySize <= NumRegs * 8) {
	NumRegs -= TySize / 8;
	return ABIArgInfo::getDirect();
	}

	// An argument is passed either completely in registers or completely in
	// memory. Since there are not enough registers left, current argument
	// and all other unprocessed arguments should be passed in memory.
	// However we still need to return `ABIArgInfo::getDirect()` other than
	// `ABIInfo::getNaturalAlignIndirect(Ty)`, otherwise an extra stack slot
	// will be allocated, so the stack frame layout will be incompatible with
	// avr-gcc.
	NumRegs = 0;
	return ABIArgInfo::getDirect();
	}

	void computeInfo(CGFunctionInfo &FI) const override {
	// Decide the return type.
	bool LargeRet = false;
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), LargeRet);

	// Decide each argument type. The total number of registers can be used for
	// arguments depends on several factors:
	// 1. Arguments of varargs functions are passed on the stack. This applies
	// even to the named arguments. So no register can be used.
	// 2. Total 18 registers can be used on avr and 6 ones on avrtiny.
	// 3. If the return type is a struct with too large size, two registers
	// (out of 18/6) will be cost as an implicit pointer argument.
	unsigned NumRegs = ParamRegs;
	if (FI.isVariadic())
	NumRegs = 0;
	else if (LargeRet)
	NumRegs -= 2;
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type, NumRegs);
	}
	};

	class AVRTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	AVRTargetCodeGenInfo(CodeGenTypes &CGT, unsigned NPR, unsigned NRR)
	: TargetCodeGenInfo(std::make_unique<AVRABIInfo>(CGT, NPR, NRR)) {}

	LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
	const VarDecl *D) const override {
	// Check if global/static variable is defined in address space
	// 1~6 (__flash, __flash1, __flash2, __flash3, __flash4, __flash5)
	// but not constant.
	if (D) {
	LangAS AS = D->getType().getAddressSpace();
	if (isTargetAddressSpace(AS) && 1 <= toTargetAddressSpace(AS) &&
	toTargetAddressSpace(AS) <= 6 && !D->getType().isConstQualified())
	CGM.getDiags().Report(D->getLocation(),
	diag::err_verify_nonconst_addrspace)
	<< "__flash*";
	}
	return TargetCodeGenInfo::getGlobalVarAddressSpace(CGM, D);
	}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	if (GV->isDeclaration())
	return;
	const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD) return;
	auto *Fn = cast<llvm::Function>(GV);

	if (FD->getAttr<AVRInterruptAttr>())
	Fn->addFnAttr("interrupt");

	if (FD->getAttr<AVRSignalAttr>())
	Fn->addFnAttr("signal");
	}
	};
	}

	//===----------------------------------------------------------------------===//
	// TCE ABI Implementation (see http://tce.cs.tut.fi). Uses mostly the defaults.
	// Currently subclassed only to implement custom OpenCL C function attribute
	// handling.
	//===----------------------------------------------------------------------===//

	namespace {

	class TCETargetCodeGenInfo : public DefaultTargetCodeGenInfo {
	public:
	TCETargetCodeGenInfo(CodeGenTypes &CGT)
	: DefaultTargetCodeGenInfo(CGT) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &M) const override;
	};

	void TCETargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
	if (GV->isDeclaration())
	return;
	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD) return;

	llvm::Function *F = cast<llvm::Function>(GV);

	if (M.getLangOpts().OpenCL) {
	if (FD->hasAttr<OpenCLKernelAttr>()) {
	// OpenCL C Kernel functions are not subject to inlining
	F->addFnAttr(llvm::Attribute::NoInline);
	const ReqdWorkGroupSizeAttr *Attr = FD->getAttr<ReqdWorkGroupSizeAttr>();
	if (Attr) {
	// Convert the reqd_work_group_size() attributes to metadata.
	llvm::LLVMContext &Context = F->getContext();
	llvm::NamedMDNode *OpenCLMetadata =
	M.getModule().getOrInsertNamedMetadata(
	"opencl.kernel_wg_size_info");

	SmallVector<llvm::Metadata *, 5> Operands;
	Operands.push_back(llvm::ConstantAsMetadata::get(F));

	Operands.push_back(
	llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
	M.Int32Ty, llvm::APInt(32, Attr->getXDim()))));
	Operands.push_back(
	llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
	M.Int32Ty, llvm::APInt(32, Attr->getYDim()))));
	Operands.push_back(
	llvm::ConstantAsMetadata::get(llvm::Constant::getIntegerValue(
	M.Int32Ty, llvm::APInt(32, Attr->getZDim()))));

	// Add a boolean constant operand for "required" (true) or "hint"
	// (false) for implementing the work_group_size_hint attr later.
	// Currently always true as the hint is not yet implemented.
	Operands.push_back(
	llvm::ConstantAsMetadata::get(llvm::ConstantInt::getTrue(Context)));
	OpenCLMetadata->addOperand(llvm::MDNode::get(Context, Operands));
	}
	}
	}
	}

	}

	//===----------------------------------------------------------------------===//
	// Hexagon ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class HexagonABIInfo : public DefaultABIInfo {
	public:
	HexagonABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}

	private:
	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, unsigned *RegsLeft) const;

	void computeInfo(CGFunctionInfo &FI) const override;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	Address EmitVAArgFromMemory(CodeGenFunction &CFG, Address VAListAddr,
	QualType Ty) const;
	Address EmitVAArgForHexagon(CodeGenFunction &CFG, Address VAListAddr,
	QualType Ty) const;
	Address EmitVAArgForHexagonLinux(CodeGenFunction &CFG, Address VAListAddr,
	QualType Ty) const;
	};

	class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	HexagonTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<HexagonABIInfo>(CGT)) {}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	return 29;
	}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &GCM) const override {
	if (GV->isDeclaration())
	return;
	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD)
	return;
	}
	};

	} // namespace

	void HexagonABIInfo::computeInfo(CGFunctionInfo &FI) const {
	unsigned RegsLeft = 6;
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type, &RegsLeft);
	}

	static bool HexagonAdjustRegsLeft(uint64_t Size, unsigned *RegsLeft) {
	assert(Size <= 64 && "Not expecting to pass arguments larger than 64 bits"
	" through registers");

	if (*RegsLeft == 0)
	return false;

	if (Size <= 32) {
	(*RegsLeft)--;
	return true;
	}

	if (2 <= (*RegsLeft & (~1U))) {
	RegsLeft = (RegsLeft & (~1U)) - 2;
	return true;
	}

	// Next available register was r5 but candidate was greater than 32-bits so it
	// has to go on the stack. However we still consume r5
	if (*RegsLeft == 1)
	*RegsLeft = 0;

	return false;
	}

	ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty,
	unsigned *RegsLeft) const {
	if (!isAggregateTypeForABI(Ty)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size <= 64)
	HexagonAdjustRegsLeft(Size, RegsLeft);

	if (Size > 64 && Ty->isBitIntType())
	return getNaturalAlignIndirect(Ty, /ByVal=/true);

	return isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
	: ABIArgInfo::getDirect();
	}

	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	// Ignore empty records.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	uint64_t Size = getContext().getTypeSize(Ty);
	unsigned Align = getContext().getTypeAlign(Ty);

	if (Size > 64)
	return getNaturalAlignIndirect(Ty, /ByVal=/true);

	if (HexagonAdjustRegsLeft(Size, RegsLeft))
	Align = Size <= 32 ? 32 : 64;
	if (Size <= Align) {
	// Pass in the smallest viable integer type.
	if (!llvm::isPowerOf2_64(Size))
	Size = llvm::NextPowerOf2(Size);
	return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
	}
	return DefaultABIInfo::classifyArgumentType(Ty);
	}

	ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	const TargetInfo &T = CGT.getTarget();
	uint64_t Size = getContext().getTypeSize(RetTy);

	if (RetTy->getAs<VectorType>()) {
	// HVX vectors are returned in vector registers or register pairs.
	if (T.hasFeature("hvx")) {
	assert(T.hasFeature("hvx-length64b") \|\| T.hasFeature("hvx-length128b"));
	uint64_t VecSize = T.hasFeature("hvx-length64b") ? 648 : 1288;
	if (Size == VecSize \|\| Size == 2*VecSize)
	return ABIArgInfo::getDirectInReg();
	}
	// Large vector types should be returned via memory.
	if (Size > 64)
	return getNaturalAlignIndirect(RetTy);
	}

	if (!isAggregateTypeForABI(RetTy)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
	RetTy = EnumTy->getDecl()->getIntegerType();

	if (Size > 64 && RetTy->isBitIntType())
	return getNaturalAlignIndirect(RetTy, /ByVal=/false);

	return isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
	: ABIArgInfo::getDirect();
	}

	if (isEmptyRecord(getContext(), RetTy, true))
	return ABIArgInfo::getIgnore();

	// Aggregates <= 8 bytes are returned in registers, other aggregates
	// are returned indirectly.
	if (Size <= 64) {
	// Return in the smallest viable integer type.
	if (!llvm::isPowerOf2_64(Size))
	Size = llvm::NextPowerOf2(Size);
	return ABIArgInfo::getDirect(llvm::Type::getIntNTy(getVMContext(), Size));
	}
	return getNaturalAlignIndirect(RetTy, /ByVal=/true);
	}

	Address HexagonABIInfo::EmitVAArgFromMemory(CodeGenFunction &CGF,
	Address VAListAddr,
	QualType Ty) const {
	// Load the overflow area pointer.
	Address __overflow_area_pointer_p =
	CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
	llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
	__overflow_area_pointer_p, "__overflow_area_pointer");

	uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
	if (Align > 4) {
	// Alignment should be a power of 2.
	assert((Align & (Align - 1)) == 0 && "Alignment is not power of 2!");

	// overflow_arg_area = (overflow_arg_area + align - 1) & -align;
	llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int64Ty, Align - 1);

	// Add offset to the current pointer to access the argument.
	__overflow_area_pointer =
	CGF.Builder.CreateGEP(CGF.Int8Ty, __overflow_area_pointer, Offset);
	llvm::Value *AsInt =
	CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);

	// Create a mask which should be "AND"ed
	// with (overflow_arg_area + align - 1)
	llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -(int)Align);
	__overflow_area_pointer = CGF.Builder.CreateIntToPtr(
	CGF.Builder.CreateAnd(AsInt, Mask), __overflow_area_pointer->getType(),
	"__overflow_area_pointer.align");
	}

	// Get the type of the argument from memory and bitcast
	// overflow area pointer to the argument type.
	llvm::Type *PTy = CGF.ConvertTypeForMem(Ty);
	Address AddrTyped = CGF.Builder.CreateElementBitCast(
	Address(__overflow_area_pointer, CGF.Int8Ty,
	CharUnits::fromQuantity(Align)),
	PTy);

	// Round up to the minimum stack alignment for varargs which is 4 bytes.
	uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);

	__overflow_area_pointer = CGF.Builder.CreateGEP(
	CGF.Int8Ty, __overflow_area_pointer,
	llvm::ConstantInt::get(CGF.Int32Ty, Offset),
	"__overflow_area_pointer.next");
	CGF.Builder.CreateStore(__overflow_area_pointer, __overflow_area_pointer_p);

	return AddrTyped;
	}

	Address HexagonABIInfo::EmitVAArgForHexagon(CodeGenFunction &CGF,
	Address VAListAddr,
	QualType Ty) const {
	// FIXME: Need to handle alignment
	llvm::Type *BP = CGF.Int8PtrTy;
	CGBuilderTy &Builder = CGF.Builder;
	Address VAListAddrAsBPP = Builder.CreateElementBitCast(VAListAddr, BP, "ap");
	llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
	// Handle address alignment for type alignment > 32 bits
	uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
	if (TyAlign > 4) {
	assert((TyAlign & (TyAlign - 1)) == 0 && "Alignment is not power of 2!");
	llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
	AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
	AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
	Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
	}
	Address AddrTyped = Builder.CreateElementBitCast(
	Address(Addr, CGF.Int8Ty, CharUnits::fromQuantity(TyAlign)),
	CGF.ConvertType(Ty));

	uint64_t Offset = llvm::alignTo(CGF.getContext().getTypeSize(Ty) / 8, 4);
	llvm::Value *NextAddr = Builder.CreateGEP(
	CGF.Int8Ty, Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
	Builder.CreateStore(NextAddr, VAListAddrAsBPP);

	return AddrTyped;
	}

	Address HexagonABIInfo::EmitVAArgForHexagonLinux(CodeGenFunction &CGF,
	Address VAListAddr,
	QualType Ty) const {
	int ArgSize = CGF.getContext().getTypeSize(Ty) / 8;

	if (ArgSize > 8)
	return EmitVAArgFromMemory(CGF, VAListAddr, Ty);

	// Here we have check if the argument is in register area or
	// in overflow area.
	// If the saved register area pointer + argsize rounded up to alignment >
	// saved register area end pointer, argument is in overflow area.
	unsigned RegsLeft = 6;
	Ty = CGF.getContext().getCanonicalType(Ty);
	(void)classifyArgumentType(Ty, &RegsLeft);

	llvm::BasicBlock *MaybeRegBlock = CGF.createBasicBlock("vaarg.maybe_reg");
	llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
	llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
	llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");

	// Get rounded size of the argument.GCC does not allow vararg of
	// size < 4 bytes. We follow the same logic here.
	ArgSize = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;
	int ArgAlign = (CGF.getContext().getTypeSize(Ty) <= 32) ? 4 : 8;

	// Argument may be in saved register area
	CGF.EmitBlock(MaybeRegBlock);

	// Load the current saved register area pointer.
	Address __current_saved_reg_area_pointer_p = CGF.Builder.CreateStructGEP(
	VAListAddr, 0, "__current_saved_reg_area_pointer_p");
	llvm::Value *__current_saved_reg_area_pointer = CGF.Builder.CreateLoad(
	__current_saved_reg_area_pointer_p, "__current_saved_reg_area_pointer");

	// Load the saved register area end pointer.
	Address __saved_reg_area_end_pointer_p = CGF.Builder.CreateStructGEP(
	VAListAddr, 1, "__saved_reg_area_end_pointer_p");
	llvm::Value *__saved_reg_area_end_pointer = CGF.Builder.CreateLoad(
	__saved_reg_area_end_pointer_p, "__saved_reg_area_end_pointer");

	// If the size of argument is > 4 bytes, check if the stack
	// location is aligned to 8 bytes
	if (ArgAlign > 4) {

	llvm::Value *__current_saved_reg_area_pointer_int =
	CGF.Builder.CreatePtrToInt(__current_saved_reg_area_pointer,
	CGF.Int32Ty);

	__current_saved_reg_area_pointer_int = CGF.Builder.CreateAdd(
	__current_saved_reg_area_pointer_int,
	llvm::ConstantInt::get(CGF.Int32Ty, (ArgAlign - 1)),
	"align_current_saved_reg_area_pointer");

	__current_saved_reg_area_pointer_int =
	CGF.Builder.CreateAnd(__current_saved_reg_area_pointer_int,
	llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
	"align_current_saved_reg_area_pointer");

	__current_saved_reg_area_pointer =
	CGF.Builder.CreateIntToPtr(__current_saved_reg_area_pointer_int,
	__current_saved_reg_area_pointer->getType(),
	"align_current_saved_reg_area_pointer");
	}

	llvm::Value *__new_saved_reg_area_pointer =
	CGF.Builder.CreateGEP(CGF.Int8Ty, __current_saved_reg_area_pointer,
	llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
	"__new_saved_reg_area_pointer");

	llvm::Value *UsingStack = nullptr;
	UsingStack = CGF.Builder.CreateICmpSGT(__new_saved_reg_area_pointer,
	__saved_reg_area_end_pointer);

	CGF.Builder.CreateCondBr(UsingStack, OnStackBlock, InRegBlock);

	// Argument in saved register area
	// Implement the block where argument is in register saved area
	CGF.EmitBlock(InRegBlock);

	llvm::Type *PTy = CGF.ConvertType(Ty);
	llvm::Value *__saved_reg_area_p = CGF.Builder.CreateBitCast(
	__current_saved_reg_area_pointer, llvm::PointerType::getUnqual(PTy));

	CGF.Builder.CreateStore(__new_saved_reg_area_pointer,
	__current_saved_reg_area_pointer_p);

	CGF.EmitBranch(ContBlock);

	// Argument in overflow area
	// Implement the block where the argument is in overflow area.
	CGF.EmitBlock(OnStackBlock);

	// Load the overflow area pointer
	Address __overflow_area_pointer_p =
	CGF.Builder.CreateStructGEP(VAListAddr, 2, "__overflow_area_pointer_p");
	llvm::Value *__overflow_area_pointer = CGF.Builder.CreateLoad(
	__overflow_area_pointer_p, "__overflow_area_pointer");

	// Align the overflow area pointer according to the alignment of the argument
	if (ArgAlign > 4) {
	llvm::Value *__overflow_area_pointer_int =
	CGF.Builder.CreatePtrToInt(__overflow_area_pointer, CGF.Int32Ty);

	__overflow_area_pointer_int =
	CGF.Builder.CreateAdd(__overflow_area_pointer_int,
	llvm::ConstantInt::get(CGF.Int32Ty, ArgAlign - 1),
	"align_overflow_area_pointer");

	__overflow_area_pointer_int =
	CGF.Builder.CreateAnd(__overflow_area_pointer_int,
	llvm::ConstantInt::get(CGF.Int32Ty, -ArgAlign),
	"align_overflow_area_pointer");

	__overflow_area_pointer = CGF.Builder.CreateIntToPtr(
	__overflow_area_pointer_int, __overflow_area_pointer->getType(),
	"align_overflow_area_pointer");
	}

	// Get the pointer for next argument in overflow area and store it
	// to overflow area pointer.
	llvm::Value *__new_overflow_area_pointer = CGF.Builder.CreateGEP(
	CGF.Int8Ty, __overflow_area_pointer,
	llvm::ConstantInt::get(CGF.Int32Ty, ArgSize),
	"__overflow_area_pointer.next");

	CGF.Builder.CreateStore(__new_overflow_area_pointer,
	__overflow_area_pointer_p);

	CGF.Builder.CreateStore(__new_overflow_area_pointer,
	__current_saved_reg_area_pointer_p);

	// Bitcast the overflow area pointer to the type of argument.
	llvm::Type *OverflowPTy = CGF.ConvertTypeForMem(Ty);
	llvm::Value *__overflow_area_p = CGF.Builder.CreateBitCast(
	__overflow_area_pointer, llvm::PointerType::getUnqual(OverflowPTy));

	CGF.EmitBranch(ContBlock);

	// Get the correct pointer to load the variable argument
	// Implement the ContBlock
	CGF.EmitBlock(ContBlock);

	llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
	llvm::Type *MemPTy = llvm::PointerType::getUnqual(MemTy);
	llvm::PHINode *ArgAddr = CGF.Builder.CreatePHI(MemPTy, 2, "vaarg.addr");
	ArgAddr->addIncoming(__saved_reg_area_p, InRegBlock);
	ArgAddr->addIncoming(__overflow_area_p, OnStackBlock);

	return Address(ArgAddr, MemTy, CharUnits::fromQuantity(ArgAlign));
	}

	Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {

	if (getTarget().getTriple().isMusl())
	return EmitVAArgForHexagonLinux(CGF, VAListAddr, Ty);

	return EmitVAArgForHexagon(CGF, VAListAddr, Ty);
	}

	//===----------------------------------------------------------------------===//
	// Lanai ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {
	class LanaiABIInfo : public DefaultABIInfo {
	public:
	LanaiABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}

	bool shouldUseInReg(QualType Ty, CCState &State) const;

	void computeInfo(CGFunctionInfo &FI) const override {
	CCState State(FI);
	// Lanai uses 4 registers to pass arguments unless the function has the
	// regparm attribute set.
	if (FI.getHasRegParm()) {
	State.FreeRegs = FI.getRegParm();
	} else {
	State.FreeRegs = 4;
	}

	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &I : FI.arguments())
	I.info = classifyArgumentType(I.type, State);
	}

	ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
	ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
	};
	} // end anonymous namespace

	bool LanaiABIInfo::shouldUseInReg(QualType Ty, CCState &State) const {
	unsigned Size = getContext().getTypeSize(Ty);
	unsigned SizeInRegs = llvm::alignTo(Size, 32U) / 32U;

	if (SizeInRegs == 0)
	return false;

	if (SizeInRegs > State.FreeRegs) {
	State.FreeRegs = 0;
	return false;
	}

	State.FreeRegs -= SizeInRegs;

	return true;
	}

	ABIArgInfo LanaiABIInfo::getIndirectResult(QualType Ty, bool ByVal,
	CCState &State) const {
	if (!ByVal) {
	if (State.FreeRegs) {
	--State.FreeRegs; // Non-byval indirects just use one pointer.
	return getNaturalAlignIndirectInReg(Ty);
	}
	return getNaturalAlignIndirect(Ty, false);
	}

	// Compute the byval alignment.
	const unsigned MinABIStackAlignInBytes = 4;
	unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /ByVal=/true,
	/Realign=/TypeAlign >
	MinABIStackAlignInBytes);
	}

	ABIArgInfo LanaiABIInfo::classifyArgumentType(QualType Ty,
	CCState &State) const {
	// Check with the C++ ABI first.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (RT) {
	CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
	if (RAA == CGCXXABI::RAA_Indirect) {
	return getIndirectResult(Ty, /ByVal=/false, State);
	} else if (RAA == CGCXXABI::RAA_DirectInMemory) {
	return getNaturalAlignIndirect(Ty, /ByVal=/true);
	}
	}

	if (isAggregateTypeForABI(Ty)) {
	// Structures with flexible arrays are always indirect.
	if (RT && RT->getDecl()->hasFlexibleArrayMember())
	return getIndirectResult(Ty, /ByVal=/true, State);

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	llvm::LLVMContext &LLVMContext = getVMContext();
	unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
	if (SizeInRegs <= State.FreeRegs) {
	llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
	SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
	llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
	State.FreeRegs -= SizeInRegs;
	return ABIArgInfo::getDirectInReg(Result);
	} else {
	State.FreeRegs = 0;
	}
	return getIndirectResult(Ty, true, State);
	}

	// Treat an enum type as its underlying type.
	if (const auto *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	bool InReg = shouldUseInReg(Ty, State);

	// Don't pass >64 bit integers in registers.
	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 64)
	return getIndirectResult(Ty, /ByVal=/true, State);

	if (isPromotableIntegerTypeForABI(Ty)) {
	if (InReg)
	return ABIArgInfo::getDirectInReg();
	return ABIArgInfo::getExtend(Ty);
	}
	if (InReg)
	return ABIArgInfo::getDirectInReg();
	return ABIArgInfo::getDirect();
	}

	namespace {
	class LanaiTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	LanaiTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<LanaiABIInfo>(CGT)) {}
	};
	}

	//===----------------------------------------------------------------------===//
	// AMDGPU ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	class AMDGPUABIInfo final : public DefaultABIInfo {
	private:
	static const unsigned MaxNumRegsForArgsRet = 16;

	unsigned numRegsForType(QualType Ty) const;

	bool isHomogeneousAggregateBaseType(QualType Ty) const override;
	bool isHomogeneousAggregateSmallEnough(const Type *Base,
	uint64_t Members) const override;

	// Coerce HIP scalar pointer arguments from generic pointers to global ones.
	llvm::Type coerceKernelArgumentType(llvm::Type Ty, unsigned FromAS,
	unsigned ToAS) const {
	// Single value types.
	auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
	if (PtrTy && PtrTy->getAddressSpace() == FromAS)
	return llvm::PointerType::getWithSamePointeeType(PtrTy, ToAS);
	return Ty;
	}

	public:
	explicit AMDGPUABIInfo(CodeGen::CodeGenTypes &CGT) :
	DefaultABIInfo(CGT) {}

	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
	ABIArgInfo classifyArgumentType(QualType Ty, unsigned &NumRegsLeft) const;

	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	};

	bool AMDGPUABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
	return true;
	}

	bool AMDGPUABIInfo::isHomogeneousAggregateSmallEnough(
	const Type *Base, uint64_t Members) const {
	uint32_t NumRegs = (getContext().getTypeSize(Base) + 31) / 32;

	// Homogeneous Aggregates may occupy at most 16 registers.
	return Members * NumRegs <= MaxNumRegsForArgsRet;
	}

	/// Estimate number of registers the type will use when passed in registers.
	unsigned AMDGPUABIInfo::numRegsForType(QualType Ty) const {
	unsigned NumRegs = 0;

	if (const VectorType *VT = Ty->getAs<VectorType>()) {
	// Compute from the number of elements. The reported size is based on the
	// in-memory size, which includes the padding 4th element for 3-vectors.
	QualType EltTy = VT->getElementType();
	unsigned EltSize = getContext().getTypeSize(EltTy);

	// 16-bit element vectors should be passed as packed.
	if (EltSize == 16)
	return (VT->getNumElements() + 1) / 2;

	unsigned EltNumRegs = (EltSize + 31) / 32;
	return EltNumRegs * VT->getNumElements();
	}

	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	assert(!RD->hasFlexibleArrayMember());

	for (const FieldDecl *Field : RD->fields()) {
	QualType FieldTy = Field->getType();
	NumRegs += numRegsForType(FieldTy);
	}

	return NumRegs;
	}

	return (getContext().getTypeSize(Ty) + 31) / 32;
	}

	void AMDGPUABIInfo::computeInfo(CGFunctionInfo &FI) const {
	llvm::CallingConv::ID CC = FI.getCallingConvention();

	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());

	unsigned NumRegsLeft = MaxNumRegsForArgsRet;
	for (auto &Arg : FI.arguments()) {
	if (CC == llvm::CallingConv::AMDGPU_KERNEL) {
	Arg.info = classifyKernelArgumentType(Arg.type);
	} else {
	Arg.info = classifyArgumentType(Arg.type, NumRegsLeft);
	}
	}
	}

	Address AMDGPUABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	llvm_unreachable("AMDGPU does not support varargs");
	}

	ABIArgInfo AMDGPUABIInfo::classifyReturnType(QualType RetTy) const {
	if (isAggregateTypeForABI(RetTy)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// returned by value.
	if (!getRecordArgABI(RetTy, getCXXABI())) {
	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), RetTy, true))
	return ABIArgInfo::getIgnore();

	// Lower single-element structs to just return a regular value.
	if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));

	if (const RecordType *RT = RetTy->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return DefaultABIInfo::classifyReturnType(RetTy);
	}

	// Pack aggregates <= 4 bytes into single VGPR or pair.
	uint64_t Size = getContext().getTypeSize(RetTy);
	if (Size <= 16)
	return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));

	if (Size <= 32)
	return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));

	if (Size <= 64) {
	llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
	return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
	}

	if (numRegsForType(RetTy) <= MaxNumRegsForArgsRet)
	return ABIArgInfo::getDirect();
	}
	}

	// Otherwise just do the default thing.
	return DefaultABIInfo::classifyReturnType(RetTy);
	}

	/// For kernels all parameters are really passed in a special buffer. It doesn't
	/// make sense to pass anything byval, so everything must be direct.
	ABIArgInfo AMDGPUABIInfo::classifyKernelArgumentType(QualType Ty) const {
	Ty = useFirstFieldIfTransparentUnion(Ty);

	// TODO: Can we omit empty structs?

	if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
	Ty = QualType(SeltTy, 0);

	llvm::Type *OrigLTy = CGT.ConvertType(Ty);
	llvm::Type *LTy = OrigLTy;
	if (getContext().getLangOpts().HIP) {
	LTy = coerceKernelArgumentType(
	OrigLTy, /FromAS=/getContext().getTargetAddressSpace(LangAS::Default),
	/ToAS=/getContext().getTargetAddressSpace(LangAS::cuda_device));
	}

	// FIXME: Should also use this for OpenCL, but it requires addressing the
	// problem of kernels being called.
	//
	// FIXME: This doesn't apply the optimization of coercing pointers in structs
	// to global address space when using byref. This would require implementing a
	// new kind of coercion of the in-memory type when for indirect arguments.
	if (!getContext().getLangOpts().OpenCL && LTy == OrigLTy &&
	isAggregateTypeForABI(Ty)) {
	return ABIArgInfo::getIndirectAliased(
	getContext().getTypeAlignInChars(Ty),
	getContext().getTargetAddressSpace(LangAS::opencl_constant),
	false /Realign/, nullptr /Padding/);
	}

	// If we set CanBeFlattened to true, CodeGen will expand the struct to its
	// individual elements, which confuses the Clover OpenCL backend; therefore we
	// have to set it to false here. Other args of getDirect() are just defaults.
	return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
	}

	ABIArgInfo AMDGPUABIInfo::classifyArgumentType(QualType Ty,
	unsigned &NumRegsLeft) const {
	assert(NumRegsLeft <= MaxNumRegsForArgsRet && "register estimate underflow");

	Ty = useFirstFieldIfTransparentUnion(Ty);

	if (isAggregateTypeForABI(Ty)) {
	// Records with non-trivial destructors/copy-constructors should not be
	// passed by value.
	if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	// Lower single-element structs to just pass a regular value. TODO: We
	// could do reasonable-size multiple-element structs too, using getExpand(),
	// though watch out for things like bitfields.
	if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));

	if (const RecordType *RT = Ty->getAs<RecordType>()) {
	const RecordDecl *RD = RT->getDecl();
	if (RD->hasFlexibleArrayMember())
	return DefaultABIInfo::classifyArgumentType(Ty);
	}

	// Pack aggregates <= 8 bytes into single VGPR or pair.
	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size <= 64) {
	unsigned NumRegs = (Size + 31) / 32;
	NumRegsLeft -= std::min(NumRegsLeft, NumRegs);

	if (Size <= 16)
	return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));

	if (Size <= 32)
	return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));

	// XXX: Should this be i64 instead, and should the limit increase?
	llvm::Type *I32Ty = llvm::Type::getInt32Ty(getVMContext());
	return ABIArgInfo::getDirect(llvm::ArrayType::get(I32Ty, 2));
	}

	if (NumRegsLeft > 0) {
	unsigned NumRegs = numRegsForType(Ty);
	if (NumRegsLeft >= NumRegs) {
	NumRegsLeft -= NumRegs;
	return ABIArgInfo::getDirect();
	}
	}
	}

	// Otherwise just do the default thing.
	ABIArgInfo ArgInfo = DefaultABIInfo::classifyArgumentType(Ty);
	if (!ArgInfo.isIndirect()) {
	unsigned NumRegs = numRegsForType(Ty);
	NumRegsLeft -= std::min(NumRegs, NumRegsLeft);
	}

	return ArgInfo;
	}

	class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<AMDGPUABIInfo>(CGT)) {}

	void setFunctionDeclAttributes(const FunctionDecl FD, llvm::Function F,
	CodeGenModule &CGM) const;

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &M) const override;
	unsigned getOpenCLKernelCallingConv() const override;

	llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
	llvm::PointerType *T, QualType QT) const override;

	LangAS getASTAllocaAddressSpace() const override {
	return getLangASFromTargetAS(
	getABIInfo().getDataLayout().getAllocaAddrSpace());
	}
	LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
	const VarDecl *D) const override;
	llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
	SyncScope Scope,
	llvm::AtomicOrdering Ordering,
	llvm::LLVMContext &Ctx) const override;
	llvm::Function *
	createEnqueuedBlockKernel(CodeGenFunction &CGF,
	llvm::Function *BlockInvokeFunc,
	llvm::Type *BlockTy) const override;
	bool shouldEmitStaticExternCAliases() const override;
	void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
	};
	}

	static bool requiresAMDGPUProtectedVisibility(const Decl *D,
	llvm::GlobalValue *GV) {
	if (GV->getVisibility() != llvm::GlobalValue::HiddenVisibility)
	return false;

	return D->hasAttr<OpenCLKernelAttr>() \|\|
	(isa<FunctionDecl>(D) && D->hasAttr<CUDAGlobalAttr>()) \|\|
	(isa<VarDecl>(D) &&
	(D->hasAttr<CUDADeviceAttr>() \|\| D->hasAttr<CUDAConstantAttr>() \|\|
	cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
	cast<VarDecl>(D)->getType()->isCUDADeviceBuiltinTextureType()));
	}

	void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
	const FunctionDecl FD, llvm::Function F, CodeGenModule &M) const {
	const auto *ReqdWGS =
	M.getLangOpts().OpenCL ? FD->getAttr<ReqdWorkGroupSizeAttr>() : nullptr;
	const bool IsOpenCLKernel =
	M.getLangOpts().OpenCL && FD->hasAttr<OpenCLKernelAttr>();
	const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr<CUDAGlobalAttr>();

	const auto *FlatWGS = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>();
	if (ReqdWGS \|\| FlatWGS) {
	unsigned Min = 0;
	unsigned Max = 0;
	if (FlatWGS) {
	Min = FlatWGS->getMin()
	->EvaluateKnownConstInt(M.getContext())
	.getExtValue();
	Max = FlatWGS->getMax()
	->EvaluateKnownConstInt(M.getContext())
	.getExtValue();
	}
	if (ReqdWGS && Min == 0 && Max == 0)
	Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim();

	if (Min != 0) {
	assert(Min <= Max && "Min must be less than or equal Max");

	std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
	F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
	} else
	assert(Max == 0 && "Max must be zero");
	} else if (IsOpenCLKernel \|\| IsHIPKernel) {
	// By default, restrict the maximum size to a value specified by
	// --gpu-max-threads-per-block=n or its default value for HIP.
	const unsigned OpenCLDefaultMaxWorkGroupSize = 256;
	const unsigned DefaultMaxWorkGroupSize =
	IsOpenCLKernel ? OpenCLDefaultMaxWorkGroupSize
	: M.getLangOpts().GPUMaxThreadsPerBlock;
	std::string AttrVal =
	std::string("1,") + llvm::utostr(DefaultMaxWorkGroupSize);
	F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
	}

	if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
	unsigned Min =
	Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue();
	unsigned Max = Attr->getMax() ? Attr->getMax()
	->EvaluateKnownConstInt(M.getContext())
	.getExtValue()
	: 0;

	if (Min != 0) {
	assert((Max == 0 \|\| Min <= Max) && "Min must be less than or equal Max");

	std::string AttrVal = llvm::utostr(Min);
	if (Max != 0)
	AttrVal = AttrVal + "," + llvm::utostr(Max);
	F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
	} else
	assert(Max == 0 && "Max must be zero");
	}

	if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
	unsigned NumSGPR = Attr->getNumSGPR();

	if (NumSGPR != 0)
	F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
	}

	if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
	uint32_t NumVGPR = Attr->getNumVGPR();

	if (NumVGPR != 0)
	F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
	}
	}

	void AMDGPUTargetCodeGenInfo::setTargetAttributes(
	const Decl D, llvm::GlobalValue GV, CodeGen::CodeGenModule &M) const {
	if (requiresAMDGPUProtectedVisibility(D, GV)) {
	GV->setVisibility(llvm::GlobalValue::ProtectedVisibility);
	GV->setDSOLocal(true);
	}

	if (GV->isDeclaration())
	return;

	llvm::Function *F = dyn_cast<llvm::Function>(GV);
	if (!F)
	return;

	const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (FD)
	setFunctionDeclAttributes(FD, F, M);

	const bool IsHIPKernel =
	M.getLangOpts().HIP && FD && FD->hasAttr<CUDAGlobalAttr>();

	if (IsHIPKernel)
	F->addFnAttr("uniform-work-group-size", "true");

	if (M.getContext().getTargetInfo().allowAMDGPUUnsafeFPAtomics())
	F->addFnAttr("amdgpu-unsafe-fp-atomics", "true");

	if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts)
	F->addFnAttr("amdgpu-ieee", "false");
	}

	unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
	return llvm::CallingConv::AMDGPU_KERNEL;
	}

	// Currently LLVM assumes null pointers always have value 0,
	// which results in incorrectly transformed IR. Therefore, instead of
	// emitting null pointers in private and local address spaces, a null
	// pointer in generic address space is emitted which is casted to a
	// pointer in local or private address space.
	llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
	const CodeGen::CodeGenModule &CGM, llvm::PointerType *PT,
	QualType QT) const {
	if (CGM.getContext().getTargetNullPointerValue(QT) == 0)
	return llvm::ConstantPointerNull::get(PT);

	auto &Ctx = CGM.getContext();
	auto NPT = llvm::PointerType::getWithSamePointeeType(
	PT, Ctx.getTargetAddressSpace(LangAS::opencl_generic));
	return llvm::ConstantExpr::getAddrSpaceCast(
	llvm::ConstantPointerNull::get(NPT), PT);
	}

	LangAS
	AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
	const VarDecl *D) const {
	assert(!CGM.getLangOpts().OpenCL &&
	!(CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) &&
	"Address space agnostic languages only");
	LangAS DefaultGlobalAS = getLangASFromTargetAS(
	CGM.getContext().getTargetAddressSpace(LangAS::opencl_global));
	if (!D)
	return DefaultGlobalAS;

	LangAS AddrSpace = D->getType().getAddressSpace();
	assert(AddrSpace == LangAS::Default \|\| isTargetAddressSpace(AddrSpace));
	if (AddrSpace != LangAS::Default)
	return AddrSpace;

	// Only promote to address space 4 if VarDecl has constant initialization.
	if (CGM.isTypeConstant(D->getType(), false) &&
	D->hasConstantInitialization()) {
	if (auto ConstAS = CGM.getTarget().getConstantAddressSpace())
	return *ConstAS;
	}
	return DefaultGlobalAS;
	}

	llvm::SyncScope::ID
	AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
	SyncScope Scope,
	llvm::AtomicOrdering Ordering,
	llvm::LLVMContext &Ctx) const {
	std::string Name;
	switch (Scope) {
	case SyncScope::HIPSingleThread:
	Name = "singlethread";
	break;
	case SyncScope::HIPWavefront:
	case SyncScope::OpenCLSubGroup:
	Name = "wavefront";
	break;
	case SyncScope::HIPWorkgroup:
	case SyncScope::OpenCLWorkGroup:
	Name = "workgroup";
	break;
	case SyncScope::HIPAgent:
	case SyncScope::OpenCLDevice:
	Name = "agent";
	break;
	case SyncScope::HIPSystem:
	case SyncScope::OpenCLAllSVMDevices:
	Name = "";
	break;
	}

	if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
	if (!Name.empty())
	Name = Twine(Twine(Name) + Twine("-")).str();

	Name = Twine(Twine(Name) + Twine("one-as")).str();
	}

	return Ctx.getOrInsertSyncScopeID(Name);
	}

	bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
	return false;
	}

	void AMDGPUTargetCodeGenInfo::setCUDAKernelCallingConvention(
	const FunctionType *&FT) const {
	FT = getABIInfo().getContext().adjustFunctionType(
	FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
	}

	//===----------------------------------------------------------------------===//
	// SPARC v8 ABI Implementation.
	// Based on the SPARC Compliance Definition version 2.4.1.
	//
	// Ensures that complex values are passed in registers.
	//
	namespace {
	class SparcV8ABIInfo : public DefaultABIInfo {
	public:
	SparcV8ABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}

	private:
	ABIArgInfo classifyReturnType(QualType RetTy) const;
	void computeInfo(CGFunctionInfo &FI) const override;
	};
	} // end anonymous namespace


	ABIArgInfo
	SparcV8ABIInfo::classifyReturnType(QualType Ty) const {
	if (Ty->isAnyComplexType()) {
	return ABIArgInfo::getDirect();
	}
	else {
	return DefaultABIInfo::classifyReturnType(Ty);
	}
	}

	void SparcV8ABIInfo::computeInfo(CGFunctionInfo &FI) const {

	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &Arg : FI.arguments())
	Arg.info = classifyArgumentType(Arg.type);
	}

	namespace {
	class SparcV8TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	SparcV8TargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<SparcV8ABIInfo>(CGT)) {}

	llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	int Offset;
	if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
	Offset = 12;
	else
	Offset = 8;
	return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
	llvm::ConstantInt::get(CGF.Int32Ty, Offset));
	}

	llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	int Offset;
	if (isAggregateTypeForABI(CGF.CurFnInfo->getReturnType()))
	Offset = -12;
	else
	Offset = -8;
	return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
	llvm::ConstantInt::get(CGF.Int32Ty, Offset));
	}
	};
	} // end anonymous namespace

	//===----------------------------------------------------------------------===//
	// SPARC v9 ABI Implementation.
	// Based on the SPARC Compliance Definition version 2.4.1.
	//
	// Function arguments a mapped to a nominal "parameter array" and promoted to
	// registers depending on their type. Each argument occupies 8 or 16 bytes in
	// the array, structs larger than 16 bytes are passed indirectly.
	//
	// One case requires special care:
	//
	// struct mixed {
	// int i;
	// float f;
	// };
	//
	// When a struct mixed is passed by value, it only occupies 8 bytes in the
	// parameter array, but the int is passed in an integer register, and the float
	// is passed in a floating point register. This is represented as two arguments
	// with the LLVM IR inreg attribute:
	//
	// declare void f(i32 inreg %i, float inreg %f)
	//
	// The code generator will only allocate 4 bytes from the parameter array for
	// the inreg arguments. All other arguments are allocated a multiple of 8
	// bytes.
	//
	namespace {
	class SparcV9ABIInfo : public ABIInfo {
	public:
	SparcV9ABIInfo(CodeGenTypes &CGT) : ABIInfo(CGT) {}

	private:
	ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
	void computeInfo(CGFunctionInfo &FI) const override;
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	// Coercion type builder for structs passed in registers. The coercion type
	// serves two purposes:
	//
	// 1. Pad structs to a multiple of 64 bits, so they are passed 'left-aligned'
	// in registers.
	// 2. Expose aligned floating point elements as first-level elements, so the
	// code generator knows to pass them in floating point registers.
	//
	// We also compute the InReg flag which indicates that the struct contains
	// aligned 32-bit floats.
	//
	struct CoerceBuilder {
	llvm::LLVMContext &Context;
	const llvm::DataLayout &DL;
	SmallVector<llvm::Type*, 8> Elems;
	uint64_t Size;
	bool InReg;

	CoerceBuilder(llvm::LLVMContext &c, const llvm::DataLayout &dl)
	: Context(c), DL(dl), Size(0), InReg(false) {}

	// Pad Elems with integers until Size is ToSize.
	void pad(uint64_t ToSize) {
	assert(ToSize >= Size && "Cannot remove elements");
	if (ToSize == Size)
	return;

	// Finish the current 64-bit word.
	uint64_t Aligned = llvm::alignTo(Size, 64);
	if (Aligned > Size && Aligned <= ToSize) {
	Elems.push_back(llvm::IntegerType::get(Context, Aligned - Size));
	Size = Aligned;
	}

	// Add whole 64-bit words.
	while (Size + 64 <= ToSize) {
	Elems.push_back(llvm::Type::getInt64Ty(Context));
	Size += 64;
	}

	// Final in-word padding.
	if (Size < ToSize) {
	Elems.push_back(llvm::IntegerType::get(Context, ToSize - Size));
	Size = ToSize;
	}
	}

	// Add a floating point element at Offset.
	void addFloat(uint64_t Offset, llvm::Type *Ty, unsigned Bits) {
	// Unaligned floats are treated as integers.
	if (Offset % Bits)
	return;
	// The InReg flag is only required if there are any floats < 64 bits.
	if (Bits < 64)
	InReg = true;
	pad(Offset);
	Elems.push_back(Ty);
	Size = Offset + Bits;
	}

	// Add a struct type to the coercion type, starting at Offset (in bits).
	void addStruct(uint64_t Offset, llvm::StructType *StrTy) {
	const llvm::StructLayout *Layout = DL.getStructLayout(StrTy);
	for (unsigned i = 0, e = StrTy->getNumElements(); i != e; ++i) {
	llvm::Type *ElemTy = StrTy->getElementType(i);
	uint64_t ElemOffset = Offset + Layout->getElementOffsetInBits(i);
	switch (ElemTy->getTypeID()) {
	case llvm::Type::StructTyID:
	addStruct(ElemOffset, cast<llvm::StructType>(ElemTy));
	break;
	case llvm::Type::FloatTyID:
	addFloat(ElemOffset, ElemTy, 32);
	break;
	case llvm::Type::DoubleTyID:
	addFloat(ElemOffset, ElemTy, 64);
	break;
	case llvm::Type::FP128TyID:
	addFloat(ElemOffset, ElemTy, 128);
	break;
	case llvm::Type::PointerTyID:
	if (ElemOffset % 64 == 0) {
	pad(ElemOffset);
	Elems.push_back(ElemTy);
	Size += 64;
	}
	break;
	default:
	break;
	}
	}
	}

	// Check if Ty is a usable substitute for the coercion type.
	bool isUsableType(llvm::StructType *Ty) const {
	return llvm::makeArrayRef(Elems) == Ty->elements();
	}

	// Get the coercion type as a literal struct type.
	llvm::Type *getType() const {
	if (Elems.size() == 1)
	return Elems.front();
	else
	return llvm::StructType::get(Context, Elems);
	}
	};
	};
	} // end anonymous namespace

	ABIArgInfo
	SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
	if (Ty->isVoidType())
	return ABIArgInfo::getIgnore();

	uint64_t Size = getContext().getTypeSize(Ty);

	// Anything too big to fit in registers is passed with an explicit indirect
	// pointer / sret pointer.
	if (Size > SizeLimit)
	return getNaturalAlignIndirect(Ty, /ByVal=/false);

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// Integer types smaller than a register are extended.
	if (Size < 64 && Ty->isIntegerType())
	return ABIArgInfo::getExtend(Ty);

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() < 64)
	return ABIArgInfo::getExtend(Ty);

	// Other non-aggregates go in registers.
	if (!isAggregateTypeForABI(Ty))
	return ABIArgInfo::getDirect();

	// If a C++ object has either a non-trivial copy constructor or a non-trivial
	// destructor, it is passed with an explicit indirect pointer / sret pointer.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
	return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);

	// This is a small aggregate type that should be passed in registers.
	// Build a coercion type from the LLVM struct type.
	llvm::StructType *StrTy = dyn_cast<llvm::StructType>(CGT.ConvertType(Ty));
	if (!StrTy)
	return ABIArgInfo::getDirect();

	CoerceBuilder CB(getVMContext(), getDataLayout());
	CB.addStruct(0, StrTy);
	CB.pad(llvm::alignTo(CB.DL.getTypeSizeInBits(StrTy), 64));

	// Try to use the original type for coercion.
	llvm::Type *CoerceTy = CB.isUsableType(StrTy) ? StrTy : CB.getType();

	if (CB.InReg)
	return ABIArgInfo::getDirectInReg(CoerceTy);
	else
	return ABIArgInfo::getDirect(CoerceTy);
	}

	Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	ABIArgInfo AI = classifyType(Ty, 16 * 8);
	llvm::Type *ArgTy = CGT.ConvertType(Ty);
	if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
	AI.setCoerceToType(ArgTy);

	CharUnits SlotSize = CharUnits::fromQuantity(8);

	CGBuilderTy &Builder = CGF.Builder;
	Address Addr = Address(Builder.CreateLoad(VAListAddr, "ap.cur"),
	getVAListElementType(CGF), SlotSize);
	llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);

	auto TypeInfo = getContext().getTypeInfoInChars(Ty);

	Address ArgAddr = Address::invalid();
	CharUnits Stride;
	switch (AI.getKind()) {
	case ABIArgInfo::Expand:
	case ABIArgInfo::CoerceAndExpand:
	case ABIArgInfo::InAlloca:
	llvm_unreachable("Unsupported ABI kind for va_arg");

	case ABIArgInfo::Extend: {
	Stride = SlotSize;
	CharUnits Offset = SlotSize - TypeInfo.Width;
	ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
	break;
	}

	case ABIArgInfo::Direct: {
	auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
	Stride = CharUnits::fromQuantity(AllocSize).alignTo(SlotSize);
	ArgAddr = Addr;
	break;
	}

	case ABIArgInfo::Indirect:
	case ABIArgInfo::IndirectAliased:
	Stride = SlotSize;
	ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
	ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"), ArgTy,
	TypeInfo.Align);
	break;

	case ABIArgInfo::Ignore:
	return Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeInfo.Align);
	}

	// Update VAList.
	Address NextPtr = Builder.CreateConstInBoundsByteGEP(Addr, Stride, "ap.next");
	Builder.CreateStore(NextPtr.getPointer(), VAListAddr);

	return Builder.CreateElementBitCast(ArgAddr, ArgTy, "arg.addr");
	}

	void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
	FI.getReturnInfo() = classifyType(FI.getReturnType(), 32 * 8);
	for (auto &I : FI.arguments())
	I.info = classifyType(I.type, 16 * 8);
	}

	namespace {
	class SparcV9TargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	SparcV9TargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<SparcV9ABIInfo>(CGT)) {}

	int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
	return 14;
	}

	bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override;

	llvm::Value *decodeReturnAddress(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
	llvm::ConstantInt::get(CGF.Int32Ty, 8));
	}

	llvm::Value *encodeReturnAddress(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const override {
	return CGF.Builder.CreateGEP(CGF.Int8Ty, Address,
	llvm::ConstantInt::get(CGF.Int32Ty, -8));
	}
	};
	} // end anonymous namespace

	bool
	SparcV9TargetCodeGenInfo::initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
	llvm::Value *Address) const {
	// This is calculated from the LLVM and GCC tables and verified
	// against gcc output. AFAIK all ABIs use the same encoding.

	CodeGen::CGBuilderTy &Builder = CGF.Builder;

	llvm::IntegerType *i8 = CGF.Int8Ty;
	llvm::Value *Four8 = llvm::ConstantInt::get(i8, 4);
	llvm::Value *Eight8 = llvm::ConstantInt::get(i8, 8);

	// 0-31: the 8-byte general-purpose registers
	AssignToArrayRange(Builder, Address, Eight8, 0, 31);

	// 32-63: f0-31, the 4-byte floating-point registers
	AssignToArrayRange(Builder, Address, Four8, 32, 63);

	// Y = 64
	// PSR = 65
	// WIM = 66
	// TBR = 67
	// PC = 68
	// NPC = 69
	// FSR = 70
	// CSR = 71
	AssignToArrayRange(Builder, Address, Eight8, 64, 71);

	// 72-87: d0-15, the 8-byte floating-point registers
	AssignToArrayRange(Builder, Address, Eight8, 72, 87);

	return false;
	}

	// ARC ABI implementation.
	namespace {

	class ARCABIInfo : public DefaultABIInfo {
	public:
	using DefaultABIInfo::DefaultABIInfo;

	private:
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	void updateState(const ABIArgInfo &Info, QualType Ty, CCState &State) const {
	if (!State.FreeRegs)
	return;
	if (Info.isIndirect() && Info.getInReg())
	State.FreeRegs--;
	else if (Info.isDirect() && Info.getInReg()) {
	unsigned sz = (getContext().getTypeSize(Ty) + 31) / 32;
	if (sz < State.FreeRegs)
	State.FreeRegs -= sz;
	else
	State.FreeRegs = 0;
	}
	}

	void computeInfo(CGFunctionInfo &FI) const override {
	CCState State(FI);
	// ARC uses 8 registers to pass arguments.
	State.FreeRegs = 8;

	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	updateState(FI.getReturnInfo(), FI.getReturnType(), State);
	for (auto &I : FI.arguments()) {
	I.info = classifyArgumentType(I.type, State.FreeRegs);
	updateState(I.info, I.type, State);
	}
	}

	ABIArgInfo getIndirectByRef(QualType Ty, bool HasFreeRegs) const;
	ABIArgInfo getIndirectByValue(QualType Ty) const;
	ABIArgInfo classifyArgumentType(QualType Ty, uint8_t FreeRegs) const;
	ABIArgInfo classifyReturnType(QualType RetTy) const;
	};

	class ARCTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	ARCTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<ARCABIInfo>(CGT)) {}
	};


	ABIArgInfo ARCABIInfo::getIndirectByRef(QualType Ty, bool HasFreeRegs) const {
	return HasFreeRegs ? getNaturalAlignIndirectInReg(Ty) :
	getNaturalAlignIndirect(Ty, false);
	}

	ABIArgInfo ARCABIInfo::getIndirectByValue(QualType Ty) const {
	// Compute the byval alignment.
	const unsigned MinABIStackAlignInBytes = 4;
	unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
	return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /ByVal=/true,
	TypeAlign > MinABIStackAlignInBytes);
	}

	Address ARCABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /indirect/ false,
	getContext().getTypeInfoInChars(Ty),
	CharUnits::fromQuantity(4), true);
	}

	ABIArgInfo ARCABIInfo::classifyArgumentType(QualType Ty,
	uint8_t FreeRegs) const {
	// Handle the generic C++ ABI.
	const RecordType *RT = Ty->getAs<RecordType>();
	if (RT) {
	CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
	if (RAA == CGCXXABI::RAA_Indirect)
	return getIndirectByRef(Ty, FreeRegs > 0);

	if (RAA == CGCXXABI::RAA_DirectInMemory)
	return getIndirectByValue(Ty);
	}

	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	auto SizeInRegs = llvm::alignTo(getContext().getTypeSize(Ty), 32) / 32;

	if (isAggregateTypeForABI(Ty)) {
	// Structures with flexible arrays are always indirect.
	if (RT && RT->getDecl()->hasFlexibleArrayMember())
	return getIndirectByValue(Ty);

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	llvm::LLVMContext &LLVMContext = getVMContext();

	llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
	SmallVector<llvm::Type *, 3> Elements(SizeInRegs, Int32);
	llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);

	return FreeRegs >= SizeInRegs ?
	ABIArgInfo::getDirectInReg(Result) :
	ABIArgInfo::getDirect(Result, 0, nullptr, false);
	}

	if (const auto *EIT = Ty->getAs<BitIntType>())
	if (EIT->getNumBits() > 64)
	return getIndirectByValue(Ty);

	return isPromotableIntegerTypeForABI(Ty)
	? (FreeRegs >= SizeInRegs ? ABIArgInfo::getExtendInReg(Ty)
	: ABIArgInfo::getExtend(Ty))
	: (FreeRegs >= SizeInRegs ? ABIArgInfo::getDirectInReg()
	: ABIArgInfo::getDirect());
	}

	ABIArgInfo ARCABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isAnyComplexType())
	return ABIArgInfo::getDirectInReg();

	// Arguments of size > 4 registers are indirect.
	auto RetSize = llvm::alignTo(getContext().getTypeSize(RetTy), 32) / 32;
	if (RetSize > 4)
	return getIndirectByRef(RetTy, /HasFreeRegs/ true);

	return DefaultABIInfo::classifyReturnType(RetTy);
	}

	} // End anonymous namespace.

	//===----------------------------------------------------------------------===//
	// XCore ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {

	/// A SmallStringEnc instance is used to build up the TypeString by passing
	/// it by reference between functions that append to it.
	typedef llvm::SmallString<128> SmallStringEnc;

	/// TypeStringCache caches the meta encodings of Types.
	///
	/// The reason for caching TypeStrings is two fold:
	/// 1. To cache a type's encoding for later uses;
	/// 2. As a means to break recursive member type inclusion.
	///
	/// A cache Entry can have a Status of:
	/// NonRecursive: The type encoding is not recursive;
	/// Recursive: The type encoding is recursive;
	/// Incomplete: An incomplete TypeString;
	/// IncompleteUsed: An incomplete TypeString that has been used in a
	/// Recursive type encoding.
	///
	/// A NonRecursive entry will have all of its sub-members expanded as fully
	/// as possible. Whilst it may contain types which are recursive, the type
	/// itself is not recursive and thus its encoding may be safely used whenever
	/// the type is encountered.
	///
	/// A Recursive entry will have all of its sub-members expanded as fully as
	/// possible. The type itself is recursive and it may contain other types which
	/// are recursive. The Recursive encoding must not be used during the expansion
	/// of a recursive type's recursive branch. For simplicity the code uses
	/// IncompleteCount to reject all usage of Recursive encodings for member types.
	///
	/// An Incomplete entry is always a RecordType and only encodes its
	/// identifier e.g. "s(S){}". Incomplete 'StubEnc' entries are ephemeral and
	/// are placed into the cache during type expansion as a means to identify and
	/// handle recursive inclusion of types as sub-members. If there is recursion
	/// the entry becomes IncompleteUsed.
	///
	/// During the expansion of a RecordType's members:
	///
	/// If the cache contains a NonRecursive encoding for the member type, the
	/// cached encoding is used;
	///
	/// If the cache contains a Recursive encoding for the member type, the
	/// cached encoding is 'Swapped' out, as it may be incorrect, and...
	///
	/// If the member is a RecordType, an Incomplete encoding is placed into the
	/// cache to break potential recursive inclusion of itself as a sub-member;
	///
	/// Once a member RecordType has been expanded, its temporary incomplete
	/// entry is removed from the cache. If a Recursive encoding was swapped out
	/// it is swapped back in;
	///
	/// If an incomplete entry is used to expand a sub-member, the incomplete
	/// entry is marked as IncompleteUsed. The cache keeps count of how many
	/// IncompleteUsed entries it currently contains in IncompleteUsedCount;
	///
	/// If a member's encoding is found to be a NonRecursive or Recursive viz:
	/// IncompleteUsedCount==0, the member's encoding is added to the cache.
	/// Else the member is part of a recursive type and thus the recursion has
	/// been exited too soon for the encoding to be correct for the member.
	///
	class TypeStringCache {
	enum Status {NonRecursive, Recursive, Incomplete, IncompleteUsed};
	struct Entry {
	std::string Str; // The encoded TypeString for the type.
	enum Status State; // Information about the encoding in 'Str'.
	std::string Swapped; // A temporary place holder for a Recursive encoding
	// during the expansion of RecordType's members.
	};
	std::map<const IdentifierInfo *, struct Entry> Map;
	unsigned IncompleteCount; // Number of Incomplete entries in the Map.
	unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
	public:
	TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {}
	void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
	bool removeIncomplete(const IdentifierInfo *ID);
	void addIfComplete(const IdentifierInfo *ID, StringRef Str,
	bool IsRecursive);
	StringRef lookupStr(const IdentifierInfo *ID);
	};

	/// TypeString encodings for enum & union fields must be order.
	/// FieldEncoding is a helper for this ordering process.
	class FieldEncoding {
	bool HasName;
	std::string Enc;
	public:
	FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {}
	StringRef str() { return Enc; }
	bool operator<(const FieldEncoding &rhs) const {
	if (HasName != rhs.HasName) return HasName;
	return Enc < rhs.Enc;
	}
	};

	class XCoreABIInfo : public DefaultABIInfo {
	public:
	XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	};

	class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
	mutable TypeStringCache TSC;
	void emitTargetMD(const Decl D, llvm::GlobalValue GV,
	const CodeGen::CodeGenModule &M) const;

	public:
	XCoreTargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<XCoreABIInfo>(CGT)) {}
	void emitTargetMetadata(CodeGen::CodeGenModule &CGM,
	const llvm::MapVector<GlobalDecl, StringRef>
	&MangledDeclNames) const override;
	};

	} // End anonymous namespace.

	// TODO: this implementation is likely now redundant with the default
	// EmitVAArg.
	Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	CGBuilderTy &Builder = CGF.Builder;

	// Get the VAList.
	CharUnits SlotSize = CharUnits::fromQuantity(4);
	Address AP = Address(Builder.CreateLoad(VAListAddr),
	getVAListElementType(CGF), SlotSize);

	// Handle the argument.
	ABIArgInfo AI = classifyArgumentType(Ty);
	CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty);
	llvm::Type *ArgTy = CGT.ConvertType(Ty);
	if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
	AI.setCoerceToType(ArgTy);
	llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);

	Address Val = Address::invalid();
	CharUnits ArgSize = CharUnits::Zero();
	switch (AI.getKind()) {
	case ABIArgInfo::Expand:
	case ABIArgInfo::CoerceAndExpand:
	case ABIArgInfo::InAlloca:
	llvm_unreachable("Unsupported ABI kind for va_arg");
	case ABIArgInfo::Ignore:
	Val = Address(llvm::UndefValue::get(ArgPtrTy), ArgTy, TypeAlign);
	ArgSize = CharUnits::Zero();
	break;
	case ABIArgInfo::Extend:
	case ABIArgInfo::Direct:
	Val = Builder.CreateElementBitCast(AP, ArgTy);
	ArgSize = CharUnits::fromQuantity(
	getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
	ArgSize = ArgSize.alignTo(SlotSize);
	break;
	case ABIArgInfo::Indirect:
	case ABIArgInfo::IndirectAliased:
	Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
	Val = Address(Builder.CreateLoad(Val), ArgTy, TypeAlign);
	ArgSize = SlotSize;
	break;
	}

	// Increment the VAList.
	if (!ArgSize.isZero()) {
	Address APN = Builder.CreateConstInBoundsByteGEP(AP, ArgSize);
	Builder.CreateStore(APN.getPointer(), VAListAddr);
	}

	return Val;
	}

	/// During the expansion of a RecordType, an incomplete TypeString is placed
	/// into the cache as a means to identify and break recursion.
	/// If there is a Recursive encoding in the cache, it is swapped out and will
	/// be reinserted by removeIncomplete().
	/// All other types of encoding should have been used rather than arriving here.
	void TypeStringCache::addIncomplete(const IdentifierInfo *ID,
	std::string StubEnc) {
	if (!ID)
	return;
	Entry &E = Map[ID];
	assert( (E.Str.empty() \|\| E.State == Recursive) &&
	"Incorrectly use of addIncomplete");
	assert(!StubEnc.empty() && "Passing an empty string to addIncomplete()");
	E.Swapped.swap(E.Str); // swap out the Recursive
	E.Str.swap(StubEnc);
	E.State = Incomplete;
	++IncompleteCount;
	}

	/// Once the RecordType has been expanded, the temporary incomplete TypeString
	/// must be removed from the cache.
	/// If a Recursive was swapped out by addIncomplete(), it will be replaced.
	/// Returns true if the RecordType was defined recursively.
	bool TypeStringCache::removeIncomplete(const IdentifierInfo *ID) {
	if (!ID)
	return false;
	auto I = Map.find(ID);
	assert(I != Map.end() && "Entry not present");
	Entry &E = I->second;
	assert( (E.State == Incomplete \|\|
	E.State == IncompleteUsed) &&
	"Entry must be an incomplete type");
	bool IsRecursive = false;
	if (E.State == IncompleteUsed) {
	// We made use of our Incomplete encoding, thus we are recursive.
	IsRecursive = true;
	--IncompleteUsedCount;
	}
	if (E.Swapped.empty())
	Map.erase(I);
	else {
	// Swap the Recursive back.
	E.Swapped.swap(E.Str);
	E.Swapped.clear();
	E.State = Recursive;
	}
	--IncompleteCount;
	return IsRecursive;
	}

	/// Add the encoded TypeString to the cache only if it is NonRecursive or
	/// Recursive (viz: all sub-members were expanded as fully as possible).
	void TypeStringCache::addIfComplete(const IdentifierInfo *ID, StringRef Str,
	bool IsRecursive) {
	if (!ID \|\| IncompleteUsedCount)
	return; // No key or it is is an incomplete sub-type so don't add.
	Entry &E = Map[ID];
	if (IsRecursive && !E.Str.empty()) {
	assert(E.State==Recursive && E.Str.size() == Str.size() &&
	"This is not the same Recursive entry");
	// The parent container was not recursive after all, so we could have used
	// this Recursive sub-member entry after all, but we assumed the worse when
	// we started viz: IncompleteCount!=0.
	return;
	}
	assert(E.Str.empty() && "Entry already present");
	E.Str = Str.str();
	E.State = IsRecursive? Recursive : NonRecursive;
	}

	/// Return a cached TypeString encoding for the ID. If there isn't one, or we
	/// are recursively expanding a type (IncompleteCount != 0) and the cached
	/// encoding is Recursive, return an empty StringRef.
	StringRef TypeStringCache::lookupStr(const IdentifierInfo *ID) {
	if (!ID)
	return StringRef(); // We have no key.
	auto I = Map.find(ID);
	if (I == Map.end())
	return StringRef(); // We have no encoding.
	Entry &E = I->second;
	if (E.State == Recursive && IncompleteCount)
	return StringRef(); // We don't use Recursive encodings for member types.

	if (E.State == Incomplete) {
	// The incomplete type is being used to break out of recursion.
	E.State = IncompleteUsed;
	++IncompleteUsedCount;
	}
	return E.Str;
	}

	/// The XCore ABI includes a type information section that communicates symbol
	/// type information to the linker. The linker uses this information to verify
	/// safety/correctness of things such as array bound and pointers et al.
	/// The ABI only requires C (and XC) language modules to emit TypeStrings.
	/// This type information (TypeString) is emitted into meta data for all global
	/// symbols: definitions, declarations, functions & variables.
	///
	/// The TypeString carries type, qualifier, name, size & value details.
	/// Please see 'Tools Development Guide' section 2.16.2 for format details:
	/// https://www.xmos.com/download/public/Tools-Development-Guide%28X9114A%29.pdf
	/// The output is tested by test/CodeGen/xcore-stringtype.c.
	///
	static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC);

	/// XCore uses emitTargetMD to emit TypeString metadata for global symbols.
	void XCoreTargetCodeGenInfo::emitTargetMD(
	const Decl D, llvm::GlobalValue GV,
	const CodeGen::CodeGenModule &CGM) const {
	SmallStringEnc Enc;
	if (getTypeString(Enc, D, CGM, TSC)) {
	llvm::LLVMContext &Ctx = CGM.getModule().getContext();
	llvm::Metadata *MDVals[] = {llvm::ConstantAsMetadata::get(GV),
	llvm::MDString::get(Ctx, Enc.str())};
	llvm::NamedMDNode *MD =
	CGM.getModule().getOrInsertNamedMetadata("xcore.typestrings");
	MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
	}
	}

	void XCoreTargetCodeGenInfo::emitTargetMetadata(
	CodeGen::CodeGenModule &CGM,
	const llvm::MapVector<GlobalDecl, StringRef> &MangledDeclNames) const {
	// Warning, new MangledDeclNames may be appended within this loop.
	// We rely on MapVector insertions adding new elements to the end
	// of the container.
	for (unsigned I = 0; I != MangledDeclNames.size(); ++I) {
	auto Val = *(MangledDeclNames.begin() + I);
	llvm::GlobalValue *GV = CGM.GetGlobalValue(Val.second);
	if (GV) {
	const Decl *D = Val.first.getDecl()->getMostRecentDecl();
	emitTargetMD(D, GV, CGM);
	}
	}
	}

	//===----------------------------------------------------------------------===//
	// Base ABI and target codegen info implementation common between SPIR and
	// SPIR-V.
	//===----------------------------------------------------------------------===//

	namespace {
	class CommonSPIRABIInfo : public DefaultABIInfo {
	public:
	CommonSPIRABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) { setCCs(); }

	private:
	void setCCs();
	};

	class SPIRVABIInfo : public CommonSPIRABIInfo {
	public:
	SPIRVABIInfo(CodeGenTypes &CGT) : CommonSPIRABIInfo(CGT) {}
	void computeInfo(CGFunctionInfo &FI) const override;

	private:
	ABIArgInfo classifyKernelArgumentType(QualType Ty) const;
	};
	} // end anonymous namespace
	namespace {
	class CommonSPIRTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	CommonSPIRTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<CommonSPIRABIInfo>(CGT)) {}
	CommonSPIRTargetCodeGenInfo(std::unique_ptr<ABIInfo> ABIInfo)
	: TargetCodeGenInfo(std::move(ABIInfo)) {}

	LangAS getASTAllocaAddressSpace() const override {
	return getLangASFromTargetAS(
	getABIInfo().getDataLayout().getAllocaAddrSpace());
	}

	unsigned getOpenCLKernelCallingConv() const override;
	};
	class SPIRVTargetCodeGenInfo : public CommonSPIRTargetCodeGenInfo {
	public:
	SPIRVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
	: CommonSPIRTargetCodeGenInfo(std::make_unique<SPIRVABIInfo>(CGT)) {}
	void setCUDAKernelCallingConvention(const FunctionType *&FT) const override;
	};
	} // End anonymous namespace.

	void CommonSPIRABIInfo::setCCs() {
	assert(getRuntimeCC() == llvm::CallingConv::C);
	RuntimeCC = llvm::CallingConv::SPIR_FUNC;
	}

	ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
	if (getContext().getLangOpts().CUDAIsDevice) {
	// Coerce pointer arguments with default address space to CrossWorkGroup
	// pointers for HIPSPV/CUDASPV. When the language mode is HIP/CUDA, the
	// SPIRTargetInfo maps cuda_device to SPIR-V's CrossWorkGroup address space.
	llvm::Type *LTy = CGT.ConvertType(Ty);
	auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
	auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
	auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
	if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
	LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS);
	return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
	}

	// Force copying aggregate type in kernel arguments by value when
	// compiling CUDA targeting SPIR-V. This is required for the object
	// copied to be valid on the device.
	// This behavior follows the CUDA spec
	// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#global-function-argument-processing,
	// and matches the NVPTX implementation.
	if (isAggregateTypeForABI(Ty))
	return getNaturalAlignIndirect(Ty, /* byval */ true);
	}
	return classifyArgumentType(Ty);
	}

	void SPIRVABIInfo::computeInfo(CGFunctionInfo &FI) const {
	// The logic is same as in DefaultABIInfo with an exception on the kernel
	// arguments handling.
	llvm::CallingConv::ID CC = FI.getCallingConvention();

	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());

	for (auto &I : FI.arguments()) {
	if (CC == llvm::CallingConv::SPIR_KERNEL) {
	I.info = classifyKernelArgumentType(I.type);
	} else {
	I.info = classifyArgumentType(I.type);
	}
	}
	}

	namespace clang {
	namespace CodeGen {
	void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI) {
	if (CGM.getTarget().getTriple().isSPIRV())
	SPIRVABIInfo(CGM.getTypes()).computeInfo(FI);
	else
	CommonSPIRABIInfo(CGM.getTypes()).computeInfo(FI);
	}
	}
	}

	unsigned CommonSPIRTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
	return llvm::CallingConv::SPIR_KERNEL;
	}

	void SPIRVTargetCodeGenInfo::setCUDAKernelCallingConvention(
	const FunctionType *&FT) const {
	// Convert HIP kernels to SPIR-V kernels.
	if (getABIInfo().getContext().getLangOpts().HIP) {
	FT = getABIInfo().getContext().adjustFunctionType(
	FT, FT->getExtInfo().withCallingConv(CC_OpenCLKernel));
	return;
	}
	}

	static bool appendType(SmallStringEnc &Enc, QualType QType,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC);

	/// Helper function for appendRecordType().
	/// Builds a SmallVector containing the encoded field types in declaration
	/// order.
	static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
	const RecordDecl *RD,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC) {
	for (const auto *Field : RD->fields()) {
	SmallStringEnc Enc;
	Enc += "m(";
	Enc += Field->getName();
	Enc += "){";
	if (Field->isBitField()) {
	Enc += "b(";
	llvm::raw_svector_ostream OS(Enc);
	OS << Field->getBitWidthValue(CGM.getContext());
	Enc += ':';
	}
	if (!appendType(Enc, Field->getType(), CGM, TSC))
	return false;
	if (Field->isBitField())
	Enc += ')';
	Enc += '}';
	FE.emplace_back(!Field->getName().empty(), Enc);
	}
	return true;
	}

	/// Appends structure and union types to Enc and adds encoding to cache.
	/// Recursively calls appendType (via extractFieldType) for each field.
	/// Union types have their fields ordered according to the ABI.
	static bool appendRecordType(SmallStringEnc &Enc, const RecordType *RT,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC, const IdentifierInfo *ID) {
	// Append the cached TypeString if we have one.
	StringRef TypeString = TSC.lookupStr(ID);
	if (!TypeString.empty()) {
	Enc += TypeString;
	return true;
	}

	// Start to emit an incomplete TypeString.
	size_t Start = Enc.size();
	Enc += (RT->isUnionType()? 'u' : 's');
	Enc += '(';
	if (ID)
	Enc += ID->getName();
	Enc += "){";

	// We collect all encoded fields and order as necessary.
	bool IsRecursive = false;
	const RecordDecl *RD = RT->getDecl()->getDefinition();
	if (RD && !RD->field_empty()) {
	// An incomplete TypeString stub is placed in the cache for this RecordType
	// so that recursive calls to this RecordType will use it whilst building a
	// complete TypeString for this RecordType.
	SmallVector<FieldEncoding, 16> FE;
	std::string StubEnc(Enc.substr(Start).str());
	StubEnc += '}'; // StubEnc now holds a valid incomplete TypeString.
	TSC.addIncomplete(ID, std::move(StubEnc));
	if (!extractFieldType(FE, RD, CGM, TSC)) {
	(void) TSC.removeIncomplete(ID);
	return false;
	}
	IsRecursive = TSC.removeIncomplete(ID);
	// The ABI requires unions to be sorted but not structures.
	// See FieldEncoding::operator< for sort algorithm.
	if (RT->isUnionType())
	llvm::sort(FE);
	// We can now complete the TypeString.
	unsigned E = FE.size();
	for (unsigned I = 0; I != E; ++I) {
	if (I)
	Enc += ',';
	Enc += FE[I].str();
	}
	}
	Enc += '}';
	TSC.addIfComplete(ID, Enc.substr(Start), IsRecursive);
	return true;
	}

	/// Appends enum types to Enc and adds the encoding to the cache.
	static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
	TypeStringCache &TSC,
	const IdentifierInfo *ID) {
	// Append the cached TypeString if we have one.
	StringRef TypeString = TSC.lookupStr(ID);
	if (!TypeString.empty()) {
	Enc += TypeString;
	return true;
	}

	size_t Start = Enc.size();
	Enc += "e(";
	if (ID)
	Enc += ID->getName();
	Enc += "){";

	// We collect all encoded enumerations and order them alphanumerically.
	if (const EnumDecl *ED = ET->getDecl()->getDefinition()) {
	SmallVector<FieldEncoding, 16> FE;
	for (auto I = ED->enumerator_begin(), E = ED->enumerator_end(); I != E;
	++I) {
	SmallStringEnc EnumEnc;
	EnumEnc += "m(";
	EnumEnc += I->getName();
	EnumEnc += "){";
	I->getInitVal().toString(EnumEnc);
	EnumEnc += '}';
	FE.push_back(FieldEncoding(!I->getName().empty(), EnumEnc));
	}
	llvm::sort(FE);
	unsigned E = FE.size();
	for (unsigned I = 0; I != E; ++I) {
	if (I)
	Enc += ',';
	Enc += FE[I].str();
	}
	}
	Enc += '}';
	TSC.addIfComplete(ID, Enc.substr(Start), false);
	return true;
	}

	/// Appends type's qualifier to Enc.
	/// This is done prior to appending the type's encoding.
	static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
	// Qualifiers are emitted in alphabetical order.
	static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"};
	int Lookup = 0;
	if (QT.isConstQualified())
	Lookup += 1<<0;
	if (QT.isRestrictQualified())
	Lookup += 1<<1;
	if (QT.isVolatileQualified())
	Lookup += 1<<2;
	Enc += Table[Lookup];
	}

	/// Appends built-in types to Enc.
	static bool appendBuiltinType(SmallStringEnc &Enc, const BuiltinType *BT) {
	const char *EncType;
	switch (BT->getKind()) {
	case BuiltinType::Void:
	EncType = "0";
	break;
	case BuiltinType::Bool:
	EncType = "b";
	break;
	case BuiltinType::Char_U:
	EncType = "uc";
	break;
	case BuiltinType::UChar:
	EncType = "uc";
	break;
	case BuiltinType::SChar:
	EncType = "sc";
	break;
	case BuiltinType::UShort:
	EncType = "us";
	break;
	case BuiltinType::Short:
	EncType = "ss";
	break;
	case BuiltinType::UInt:
	EncType = "ui";
	break;
	case BuiltinType::Int:
	EncType = "si";
	break;
	case BuiltinType::ULong:
	EncType = "ul";
	break;
	case BuiltinType::Long:
	EncType = "sl";
	break;
	case BuiltinType::ULongLong:
	EncType = "ull";
	break;
	case BuiltinType::LongLong:
	EncType = "sll";
	break;
	case BuiltinType::Float:
	EncType = "ft";
	break;
	case BuiltinType::Double:
	EncType = "d";
	break;
	case BuiltinType::LongDouble:
	EncType = "ld";
	break;
	default:
	return false;
	}
	Enc += EncType;
	return true;
	}

	/// Appends a pointer encoding to Enc before calling appendType for the pointee.
	static bool appendPointerType(SmallStringEnc &Enc, const PointerType *PT,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC) {
	Enc += "p(";
	if (!appendType(Enc, PT->getPointeeType(), CGM, TSC))
	return false;
	Enc += ')';
	return true;
	}

	/// Appends array encoding to Enc before calling appendType for the element.
	static bool appendArrayType(SmallStringEnc &Enc, QualType QT,
	const ArrayType *AT,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC, StringRef NoSizeEnc) {
	if (AT->getSizeModifier() != ArrayType::Normal)
	return false;
	Enc += "a(";
	if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
	CAT->getSize().toStringUnsigned(Enc);
	else
	Enc += NoSizeEnc; // Global arrays use "*", otherwise it is "".
	Enc += ':';
	// The Qualifiers should be attached to the type rather than the array.
	appendQualifier(Enc, QT);
	if (!appendType(Enc, AT->getElementType(), CGM, TSC))
	return false;
	Enc += ')';
	return true;
	}

	/// Appends a function encoding to Enc, calling appendType for the return type
	/// and the arguments.
	static bool appendFunctionType(SmallStringEnc &Enc, const FunctionType *FT,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC) {
	Enc += "f{";
	if (!appendType(Enc, FT->getReturnType(), CGM, TSC))
	return false;
	Enc += "}(";
	if (const FunctionProtoType *FPT = FT->getAs<FunctionProtoType>()) {
	// N.B. we are only interested in the adjusted param types.
	auto I = FPT->param_type_begin();
	auto E = FPT->param_type_end();
	if (I != E) {
	do {
	if (!appendType(Enc, *I, CGM, TSC))
	return false;
	++I;
	if (I != E)
	Enc += ',';
	} while (I != E);
	if (FPT->isVariadic())
	Enc += ",va";
	} else {
	if (FPT->isVariadic())
	Enc += "va";
	else
	Enc += '0';
	}
	}
	Enc += ')';
	return true;
	}

	/// Handles the type's qualifier before dispatching a call to handle specific
	/// type encodings.
	static bool appendType(SmallStringEnc &Enc, QualType QType,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC) {

	QualType QT = QType.getCanonicalType();

	if (const ArrayType *AT = QT->getAsArrayTypeUnsafe())
	// The Qualifiers should be attached to the type rather than the array.
	// Thus we don't call appendQualifier() here.
	return appendArrayType(Enc, QT, AT, CGM, TSC, "");

	appendQualifier(Enc, QT);

	if (const BuiltinType *BT = QT->getAs<BuiltinType>())
	return appendBuiltinType(Enc, BT);

	if (const PointerType *PT = QT->getAs<PointerType>())
	return appendPointerType(Enc, PT, CGM, TSC);

	if (const EnumType *ET = QT->getAs<EnumType>())
	return appendEnumType(Enc, ET, TSC, QT.getBaseTypeIdentifier());

	if (const RecordType *RT = QT->getAsStructureType())
	return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());

	if (const RecordType *RT = QT->getAsUnionType())
	return appendRecordType(Enc, RT, CGM, TSC, QT.getBaseTypeIdentifier());

	if (const FunctionType *FT = QT->getAs<FunctionType>())
	return appendFunctionType(Enc, FT, CGM, TSC);

	return false;
	}

	static bool getTypeString(SmallStringEnc &Enc, const Decl *D,
	const CodeGen::CodeGenModule &CGM,
	TypeStringCache &TSC) {
	if (!D)
	return false;

	if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getLanguageLinkage() != CLanguageLinkage)
	return false;
	return appendType(Enc, FD->getType(), CGM, TSC);
	}

	if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
	if (VD->getLanguageLinkage() != CLanguageLinkage)
	return false;
	QualType QT = VD->getType().getCanonicalType();
	if (const ArrayType *AT = QT->getAsArrayTypeUnsafe()) {
	// Global ArrayTypes are given a size of '*' if the size is unknown.
	// The Qualifiers should be attached to the type rather than the array.
	// Thus we don't call appendQualifier() here.
	return appendArrayType(Enc, QT, AT, CGM, TSC, "*");
	}
	return appendType(Enc, QT, CGM, TSC);
	}
	return false;
	}

	//===----------------------------------------------------------------------===//
	// RISCV ABI Implementation
	//===----------------------------------------------------------------------===//

	namespace {
	class RISCVABIInfo : public DefaultABIInfo {
	private:
	// Size of the integer ('x') registers in bits.
	unsigned XLen;
	// Size of the floating point ('f') registers in bits. Note that the target
	// ISA might have a wider FLen than the selected ABI (e.g. an RV32IF target
	// with soft float ABI has FLen==0).
	unsigned FLen;
	static const int NumArgGPRs = 8;
	static const int NumArgFPRs = 8;
	bool detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
	llvm::Type *&Field1Ty,
	CharUnits &Field1Off,
	llvm::Type *&Field2Ty,
	CharUnits &Field2Off) const;

	public:
	RISCVABIInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen, unsigned FLen)
	: DefaultABIInfo(CGT), XLen(XLen), FLen(FLen) {}

	// DefaultABIInfo's classifyReturnType and classifyArgumentType are
	// non-virtual, but computeInfo is virtual, so we overload it.
	void computeInfo(CGFunctionInfo &FI) const override;

	ABIArgInfo classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft,
	int &ArgFPRsLeft) const;
	ABIArgInfo classifyReturnType(QualType RetTy) const;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;

	ABIArgInfo extendType(QualType Ty) const;

	bool detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
	CharUnits &Field1Off, llvm::Type *&Field2Ty,
	CharUnits &Field2Off, int &NeededArgGPRs,
	int &NeededArgFPRs) const;
	ABIArgInfo coerceAndExpandFPCCEligibleStruct(llvm::Type *Field1Ty,
	CharUnits Field1Off,
	llvm::Type *Field2Ty,
	CharUnits Field2Off) const;
	};
	} // end anonymous namespace

	void RISCVABIInfo::computeInfo(CGFunctionInfo &FI) const {
	QualType RetTy = FI.getReturnType();
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(RetTy);

	// IsRetIndirect is true if classifyArgumentType indicated the value should
	// be passed indirect, or if the type size is a scalar greater than 2*XLen
	// and not a complex type with elements <= FLen. e.g. fp128 is passed direct
	// in LLVM IR, relying on the backend lowering code to rewrite the argument
	// list and pass indirectly on RV32.
	bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;
	if (!IsRetIndirect && RetTy->isScalarType() &&
	getContext().getTypeSize(RetTy) > (2 * XLen)) {
	if (RetTy->isComplexType() && FLen) {
	QualType EltTy = RetTy->castAs<ComplexType>()->getElementType();
	IsRetIndirect = getContext().getTypeSize(EltTy) > FLen;
	} else {
	// This is a normal scalar > 2*XLen, such as fp128 on RV32.
	IsRetIndirect = true;
	}
	}

	// We must track the number of GPRs used in order to conform to the RISC-V
	// ABI, as integer scalars passed in registers should have signext/zeroext
	// when promoted, but are anyext if passed on the stack. As GPR usage is
	// different for variadic arguments, we must also track whether we are
	// examining a vararg or not.
	int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
	int ArgFPRsLeft = FLen ? NumArgFPRs : 0;
	int NumFixedArgs = FI.getNumRequiredArgs();

	int ArgNum = 0;
	for (auto &ArgInfo : FI.arguments()) {
	bool IsFixed = ArgNum < NumFixedArgs;
	ArgInfo.info =
	classifyArgumentType(ArgInfo.type, IsFixed, ArgGPRsLeft, ArgFPRsLeft);
	ArgNum++;
	}
	}

	// Returns true if the struct is a potential candidate for the floating point
	// calling convention. If this function returns true, the caller is
	// responsible for checking that if there is only a single field then that
	// field is a float.
	bool RISCVABIInfo::detectFPCCEligibleStructHelper(QualType Ty, CharUnits CurOff,
	llvm::Type *&Field1Ty,
	CharUnits &Field1Off,
	llvm::Type *&Field2Ty,
	CharUnits &Field2Off) const {
	bool IsInt = Ty->isIntegralOrEnumerationType();
	bool IsFloat = Ty->isRealFloatingType();

	if (IsInt \|\| IsFloat) {
	uint64_t Size = getContext().getTypeSize(Ty);
	if (IsInt && Size > XLen)
	return false;
	// Can't be eligible if larger than the FP registers. Half precision isn't
	// currently supported on RISC-V and the ABI hasn't been confirmed, so
	// default to the integer ABI in that case.
	if (IsFloat && (Size > FLen \|\| Size < 32))
	return false;
	// Can't be eligible if an integer type was already found (int+int pairs
	// are not eligible).
	if (IsInt && Field1Ty && Field1Ty->isIntegerTy())
	return false;
	if (!Field1Ty) {
	Field1Ty = CGT.ConvertType(Ty);
	Field1Off = CurOff;
	return true;
	}
	if (!Field2Ty) {
	Field2Ty = CGT.ConvertType(Ty);
	Field2Off = CurOff;
	return true;
	}
	return false;
	}

	if (auto CTy = Ty->getAs<ComplexType>()) {
	if (Field1Ty)
	return false;
	QualType EltTy = CTy->getElementType();
	if (getContext().getTypeSize(EltTy) > FLen)
	return false;
	Field1Ty = CGT.ConvertType(EltTy);
	Field1Off = CurOff;
	Field2Ty = Field1Ty;
	Field2Off = Field1Off + getContext().getTypeSizeInChars(EltTy);
	return true;
	}

	if (const ConstantArrayType *ATy = getContext().getAsConstantArrayType(Ty)) {
	uint64_t ArraySize = ATy->getSize().getZExtValue();
	QualType EltTy = ATy->getElementType();
	CharUnits EltSize = getContext().getTypeSizeInChars(EltTy);
	for (uint64_t i = 0; i < ArraySize; ++i) {
	bool Ret = detectFPCCEligibleStructHelper(EltTy, CurOff, Field1Ty,
	Field1Off, Field2Ty, Field2Off);
	if (!Ret)
	return false;
	CurOff += EltSize;
	}
	return true;
	}

	if (const auto *RTy = Ty->getAs<RecordType>()) {
	// Structures with either a non-trivial destructor or a non-trivial
	// copy constructor are not eligible for the FP calling convention.
	if (getRecordArgABI(Ty, CGT.getCXXABI()))
	return false;
	if (isEmptyRecord(getContext(), Ty, true))
	return true;
	const RecordDecl *RD = RTy->getDecl();
	// Unions aren't eligible unless they're empty (which is caught above).
	if (RD->isUnion())
	return false;
	const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD);
	// If this is a C++ record, check the bases first.
	if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
	for (const CXXBaseSpecifier &B : CXXRD->bases()) {
	const auto *BDecl =
	cast<CXXRecordDecl>(B.getType()->castAs<RecordType>()->getDecl());
	CharUnits BaseOff = Layout.getBaseClassOffset(BDecl);
	bool Ret = detectFPCCEligibleStructHelper(B.getType(), CurOff + BaseOff,
	Field1Ty, Field1Off, Field2Ty,
	Field2Off);
	if (!Ret)
	return false;
	}
	}
	int ZeroWidthBitFieldCount = 0;
	for (const FieldDecl *FD : RD->fields()) {
	uint64_t FieldOffInBits = Layout.getFieldOffset(FD->getFieldIndex());
	QualType QTy = FD->getType();
	if (FD->isBitField()) {
	unsigned BitWidth = FD->getBitWidthValue(getContext());
	// Allow a bitfield with a type greater than XLen as long as the
	// bitwidth is XLen or less.
	if (getContext().getTypeSize(QTy) > XLen && BitWidth <= XLen)
	QTy = getContext().getIntTypeForBitwidth(XLen, false);
	if (BitWidth == 0) {
	ZeroWidthBitFieldCount++;
	continue;
	}
	}

	bool Ret = detectFPCCEligibleStructHelper(
	QTy, CurOff + getContext().toCharUnitsFromBits(FieldOffInBits),
	Field1Ty, Field1Off, Field2Ty, Field2Off);
	if (!Ret)
	return false;

	// As a quirk of the ABI, zero-width bitfields aren't ignored for fp+fp
	// or int+fp structs, but are ignored for a struct with an fp field and
	// any number of zero-width bitfields.
	if (Field2Ty && ZeroWidthBitFieldCount > 0)
	return false;
	}
	return Field1Ty != nullptr;
	}

	return false;
	}

	// Determine if a struct is eligible for passing according to the floating
	// point calling convention (i.e., when flattened it contains a single fp
	// value, fp+fp, or int+fp of appropriate size). If so, NeededArgFPRs and
	// NeededArgGPRs are incremented appropriately.
	bool RISCVABIInfo::detectFPCCEligibleStruct(QualType Ty, llvm::Type *&Field1Ty,
	CharUnits &Field1Off,
	llvm::Type *&Field2Ty,
	CharUnits &Field2Off,
	int &NeededArgGPRs,
	int &NeededArgFPRs) const {
	Field1Ty = nullptr;
	Field2Ty = nullptr;
	NeededArgGPRs = 0;
	NeededArgFPRs = 0;
	bool IsCandidate = detectFPCCEligibleStructHelper(
	Ty, CharUnits::Zero(), Field1Ty, Field1Off, Field2Ty, Field2Off);
	// Not really a candidate if we have a single int but no float.
	if (Field1Ty && !Field2Ty && !Field1Ty->isFloatingPointTy())
	return false;
	if (!IsCandidate)
	return false;
	if (Field1Ty && Field1Ty->isFloatingPointTy())
	NeededArgFPRs++;
	else if (Field1Ty)
	NeededArgGPRs++;
	if (Field2Ty && Field2Ty->isFloatingPointTy())
	NeededArgFPRs++;
	else if (Field2Ty)
	NeededArgGPRs++;
	return true;
	}

	// Call getCoerceAndExpand for the two-element flattened struct described by
	// Field1Ty, Field1Off, Field2Ty, Field2Off. This method will create an
	// appropriate coerceToType and unpaddedCoerceToType.
	ABIArgInfo RISCVABIInfo::coerceAndExpandFPCCEligibleStruct(
	llvm::Type Field1Ty, CharUnits Field1Off, llvm::Type Field2Ty,
	CharUnits Field2Off) const {
	SmallVector<llvm::Type *, 3> CoerceElts;
	SmallVector<llvm::Type *, 2> UnpaddedCoerceElts;
	if (!Field1Off.isZero())
	CoerceElts.push_back(llvm::ArrayType::get(
	llvm::Type::getInt8Ty(getVMContext()), Field1Off.getQuantity()));

	CoerceElts.push_back(Field1Ty);
	UnpaddedCoerceElts.push_back(Field1Ty);

	if (!Field2Ty) {
	return ABIArgInfo::getCoerceAndExpand(
	llvm::StructType::get(getVMContext(), CoerceElts, !Field1Off.isZero()),
	UnpaddedCoerceElts[0]);
	}

	CharUnits Field2Align =
	CharUnits::fromQuantity(getDataLayout().getABITypeAlignment(Field2Ty));
	CharUnits Field1End = Field1Off +
	CharUnits::fromQuantity(getDataLayout().getTypeStoreSize(Field1Ty));
	CharUnits Field2OffNoPadNoPack = Field1End.alignTo(Field2Align);

	CharUnits Padding = CharUnits::Zero();
	if (Field2Off > Field2OffNoPadNoPack)
	Padding = Field2Off - Field2OffNoPadNoPack;
	else if (Field2Off != Field2Align && Field2Off > Field1End)
	Padding = Field2Off - Field1End;

	bool IsPacked = !Field2Off.isMultipleOf(Field2Align);

	if (!Padding.isZero())
	CoerceElts.push_back(llvm::ArrayType::get(
	llvm::Type::getInt8Ty(getVMContext()), Padding.getQuantity()));

	CoerceElts.push_back(Field2Ty);
	UnpaddedCoerceElts.push_back(Field2Ty);

	auto CoerceToType =
	llvm::StructType::get(getVMContext(), CoerceElts, IsPacked);
	auto UnpaddedCoerceToType =
	llvm::StructType::get(getVMContext(), UnpaddedCoerceElts, IsPacked);

	return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
	}

	ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
	int &ArgGPRsLeft,
	int &ArgFPRsLeft) const {
	assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
	Ty = useFirstFieldIfTransparentUnion(Ty);

	// Structures with either a non-trivial destructor or a non-trivial
	// copy constructor are always passed indirectly.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
	if (ArgGPRsLeft)
	ArgGPRsLeft -= 1;
	return getNaturalAlignIndirect(Ty, /ByVal=/RAA ==
	CGCXXABI::RAA_DirectInMemory);
	}

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	uint64_t Size = getContext().getTypeSize(Ty);

	// Pass floating point values via FPRs if possible.
	if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() &&
	FLen >= Size && ArgFPRsLeft) {
	ArgFPRsLeft--;
	return ABIArgInfo::getDirect();
	}

	// Complex types for the hard float ABI must be passed direct rather than
	// using CoerceAndExpand.
	if (IsFixed && Ty->isComplexType() && FLen && ArgFPRsLeft >= 2) {
	QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
	if (getContext().getTypeSize(EltTy) <= FLen) {
	ArgFPRsLeft -= 2;
	return ABIArgInfo::getDirect();
	}
	}

	if (IsFixed && FLen && Ty->isStructureOrClassType()) {
	llvm::Type *Field1Ty = nullptr;
	llvm::Type *Field2Ty = nullptr;
	CharUnits Field1Off = CharUnits::Zero();
	CharUnits Field2Off = CharUnits::Zero();
	int NeededArgGPRs = 0;
	int NeededArgFPRs = 0;
	bool IsCandidate =
	detectFPCCEligibleStruct(Ty, Field1Ty, Field1Off, Field2Ty, Field2Off,
	NeededArgGPRs, NeededArgFPRs);
	if (IsCandidate && NeededArgGPRs <= ArgGPRsLeft &&
	NeededArgFPRs <= ArgFPRsLeft) {
	ArgGPRsLeft -= NeededArgGPRs;
	ArgFPRsLeft -= NeededArgFPRs;
	return coerceAndExpandFPCCEligibleStruct(Field1Ty, Field1Off, Field2Ty,
	Field2Off);
	}
	}

	uint64_t NeededAlign = getContext().getTypeAlign(Ty);
	bool MustUseStack = false;
	// Determine the number of GPRs needed to pass the current argument
	// according to the ABI. 2*XLen-aligned varargs are passed in "aligned"
	// register pairs, so may consume 3 registers.
	int NeededArgGPRs = 1;
	if (!IsFixed && NeededAlign == 2 * XLen)
	NeededArgGPRs = 2 + (ArgGPRsLeft % 2);
	else if (Size > XLen && Size <= 2 * XLen)
	NeededArgGPRs = 2;

	if (NeededArgGPRs > ArgGPRsLeft) {
	MustUseStack = true;
	NeededArgGPRs = ArgGPRsLeft;
	}

	ArgGPRsLeft -= NeededArgGPRs;

	if (!isAggregateTypeForABI(Ty) && !Ty->isVectorType()) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// All integral types are promoted to XLen width, unless passed on the
	// stack.
	if (Size < XLen && Ty->isIntegralOrEnumerationType() && !MustUseStack) {
	return extendType(Ty);
	}

	if (const auto *EIT = Ty->getAs<BitIntType>()) {
	if (EIT->getNumBits() < XLen && !MustUseStack)
	return extendType(Ty);
	if (EIT->getNumBits() > 128 \|\|
	(!getContext().getTargetInfo().hasInt128Type() &&
	EIT->getNumBits() > 64))
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	return ABIArgInfo::getDirect();
	}

	// Aggregates which are <= 2*XLen will be passed in registers if possible,
	// so coerce to integers.
	if (Size <= 2 * XLen) {
	unsigned Alignment = getContext().getTypeAlign(Ty);

	// Use a single XLen int if possible, 2XLen if 2XLen alignment is
	// required, and a 2-element XLen array if only XLen alignment is required.
	if (Size <= XLen) {
	return ABIArgInfo::getDirect(
	llvm::IntegerType::get(getVMContext(), XLen));
	} else if (Alignment == 2 * XLen) {
	return ABIArgInfo::getDirect(
	llvm::IntegerType::get(getVMContext(), 2 * XLen));
	} else {
	return ABIArgInfo::getDirect(llvm::ArrayType::get(
	llvm::IntegerType::get(getVMContext(), XLen), 2));
	}
	}
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	ABIArgInfo RISCVABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	int ArgGPRsLeft = 2;
	int ArgFPRsLeft = FLen ? 2 : 0;

	// The rules for return and argument types are the same, so defer to
	// classifyArgumentType.
	return classifyArgumentType(RetTy, /IsFixed=/true, ArgGPRsLeft,
	ArgFPRsLeft);
	}

	Address RISCVABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);

	// Empty records are ignored for parameter passing purposes.
	if (isEmptyRecord(getContext(), Ty, true)) {
	Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
	getVAListElementType(CGF), SlotSize);
	Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
	return Addr;
	}

	auto TInfo = getContext().getTypeInfoInChars(Ty);

	// Arguments bigger than 2*Xlen bytes are passed indirectly.
	bool IsIndirect = TInfo.Width > 2 * SlotSize;

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TInfo,
	SlotSize, /AllowHigherAlign=/true);
	}

	ABIArgInfo RISCVABIInfo::extendType(QualType Ty) const {
	int TySize = getContext().getTypeSize(Ty);
	// RV64 ABI requires unsigned 32 bit integers to be sign extended.
	if (XLen == 64 && Ty->isUnsignedIntegerOrEnumerationType() && TySize == 32)
	return ABIArgInfo::getSignExtend(Ty);
	return ABIArgInfo::getExtend(Ty);
	}

	namespace {
	class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	RISCVTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned XLen,
	unsigned FLen)
	: TargetCodeGenInfo(std::make_unique<RISCVABIInfo>(CGT, XLen, FLen)) {}

	void setTargetAttributes(const Decl D, llvm::GlobalValue GV,
	CodeGen::CodeGenModule &CGM) const override {
	const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
	if (!FD) return;

	const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
	if (!Attr)
	return;

	const char *Kind;
	switch (Attr->getInterrupt()) {
	case RISCVInterruptAttr::user: Kind = "user"; break;
	case RISCVInterruptAttr::supervisor: Kind = "supervisor"; break;
	case RISCVInterruptAttr::machine: Kind = "machine"; break;
	}

	auto *Fn = cast<llvm::Function>(GV);

	Fn->addFnAttr("interrupt", Kind);
	}
	};
	} // namespace

	//===----------------------------------------------------------------------===//
	// VE ABI Implementation.
	//
	namespace {
	class VEABIInfo : public DefaultABIInfo {
	public:
	VEABIInfo(CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}

	private:
	ABIArgInfo classifyReturnType(QualType RetTy) const;
	ABIArgInfo classifyArgumentType(QualType RetTy) const;
	void computeInfo(CGFunctionInfo &FI) const override;
	};
	} // end anonymous namespace

	ABIArgInfo VEABIInfo::classifyReturnType(QualType Ty) const {
	if (Ty->isAnyComplexType())
	return ABIArgInfo::getDirect();
	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size < 64 && Ty->isIntegerType())
	return ABIArgInfo::getExtend(Ty);
	return DefaultABIInfo::classifyReturnType(Ty);
	}

	ABIArgInfo VEABIInfo::classifyArgumentType(QualType Ty) const {
	if (Ty->isAnyComplexType())
	return ABIArgInfo::getDirect();
	uint64_t Size = getContext().getTypeSize(Ty);
	if (Size < 64 && Ty->isIntegerType())
	return ABIArgInfo::getExtend(Ty);
	return DefaultABIInfo::classifyArgumentType(Ty);
	}

	void VEABIInfo::computeInfo(CGFunctionInfo &FI) const {
	FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
	for (auto &Arg : FI.arguments())
	Arg.info = classifyArgumentType(Arg.type);
	}

	namespace {
	class VETargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	VETargetCodeGenInfo(CodeGenTypes &CGT)
	: TargetCodeGenInfo(std::make_unique<VEABIInfo>(CGT)) {}
	// VE ABI requires the arguments of variadic and prototype-less functions
	// are passed in both registers and memory.
	bool isNoProtoCallVariadic(const CallArgList &args,
	const FunctionNoProtoType *fnType) const override {
	return true;
	}
	};
	} // end anonymous namespace

	//===----------------------------------------------------------------------===//
	// CSKY ABI Implementation
	//===----------------------------------------------------------------------===//
	namespace {
	class CSKYABIInfo : public DefaultABIInfo {
	static const int NumArgGPRs = 4;
	static const int NumArgFPRs = 4;

	static const unsigned XLen = 32;
	unsigned FLen;

	public:
	CSKYABIInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
	: DefaultABIInfo(CGT), FLen(FLen) {}

	void computeInfo(CGFunctionInfo &FI) const override;
	ABIArgInfo classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
	int &ArgFPRsLeft,
	bool isReturnType = false) const;
	ABIArgInfo classifyReturnType(QualType RetTy) const;

	Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const override;
	};

	} // end anonymous namespace

	void CSKYABIInfo::computeInfo(CGFunctionInfo &FI) const {
	QualType RetTy = FI.getReturnType();
	if (!getCXXABI().classifyReturnType(FI))
	FI.getReturnInfo() = classifyReturnType(RetTy);

	bool IsRetIndirect = FI.getReturnInfo().getKind() == ABIArgInfo::Indirect;

	// We must track the number of GPRs used in order to conform to the CSKY
	// ABI, as integer scalars passed in registers should have signext/zeroext
	// when promoted.
	int ArgGPRsLeft = IsRetIndirect ? NumArgGPRs - 1 : NumArgGPRs;
	int ArgFPRsLeft = FLen ? NumArgFPRs : 0;

	for (auto &ArgInfo : FI.arguments()) {
	ArgInfo.info = classifyArgumentType(ArgInfo.type, ArgGPRsLeft, ArgFPRsLeft);
	}
	}

	Address CSKYABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
	QualType Ty) const {
	CharUnits SlotSize = CharUnits::fromQuantity(XLen / 8);

	// Empty records are ignored for parameter passing purposes.
	if (isEmptyRecord(getContext(), Ty, true)) {
	Address Addr = Address(CGF.Builder.CreateLoad(VAListAddr),
	getVAListElementType(CGF), SlotSize);
	Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
	return Addr;
	}

	auto TInfo = getContext().getTypeInfoInChars(Ty);

	return emitVoidPtrVAArg(CGF, VAListAddr, Ty, false, TInfo, SlotSize,
	/AllowHigherAlign=/true);
	}

	ABIArgInfo CSKYABIInfo::classifyArgumentType(QualType Ty, int &ArgGPRsLeft,
	int &ArgFPRsLeft,
	bool isReturnType) const {
	assert(ArgGPRsLeft <= NumArgGPRs && "Arg GPR tracking underflow");
	Ty = useFirstFieldIfTransparentUnion(Ty);

	// Structures with either a non-trivial destructor or a non-trivial
	// copy constructor are always passed indirectly.
	if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
	if (ArgGPRsLeft)
	ArgGPRsLeft -= 1;
	return getNaturalAlignIndirect(Ty, /ByVal=/RAA ==
	CGCXXABI::RAA_DirectInMemory);
	}

	// Ignore empty structs/unions.
	if (isEmptyRecord(getContext(), Ty, true))
	return ABIArgInfo::getIgnore();

	if (!Ty->getAsUnionType())
	if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
	return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));

	uint64_t Size = getContext().getTypeSize(Ty);
	// Pass floating point values via FPRs if possible.
	if (Ty->isFloatingType() && !Ty->isComplexType() && FLen >= Size &&
	ArgFPRsLeft) {
	ArgFPRsLeft--;
	return ABIArgInfo::getDirect();
	}

	// Complex types for the hard float ABI must be passed direct rather than
	// using CoerceAndExpand.
	if (Ty->isComplexType() && FLen && !isReturnType) {
	QualType EltTy = Ty->castAs<ComplexType>()->getElementType();
	if (getContext().getTypeSize(EltTy) <= FLen) {
	ArgFPRsLeft -= 2;
	return ABIArgInfo::getDirect();
	}
	}

	if (!isAggregateTypeForABI(Ty)) {
	// Treat an enum type as its underlying type.
	if (const EnumType *EnumTy = Ty->getAs<EnumType>())
	Ty = EnumTy->getDecl()->getIntegerType();

	// All integral types are promoted to XLen width, unless passed on the
	// stack.
	if (Size < XLen && Ty->isIntegralOrEnumerationType())
	return ABIArgInfo::getExtend(Ty);

	if (const auto *EIT = Ty->getAs<BitIntType>()) {
	if (EIT->getNumBits() < XLen)
	return ABIArgInfo::getExtend(Ty);
	}

	return ABIArgInfo::getDirect();
	}

	// For argument type, the first 4*XLen parts of aggregate will be passed
	// in registers, and the rest will be passed in stack.
	// So we can coerce to integers directly and let backend handle it correctly.
	// For return type, aggregate which <= 2*XLen will be returned in registers.
	// Otherwise, aggregate will be returned indirectly.
	if (!isReturnType \|\| (isReturnType && Size <= 2 * XLen)) {
	if (Size <= XLen) {
	return ABIArgInfo::getDirect(
	llvm::IntegerType::get(getVMContext(), XLen));
	} else {
	return ABIArgInfo::getDirect(llvm::ArrayType::get(
	llvm::IntegerType::get(getVMContext(), XLen), (Size + 31) / XLen));
	}
	}
	return getNaturalAlignIndirect(Ty, /ByVal=/false);
	}

	ABIArgInfo CSKYABIInfo::classifyReturnType(QualType RetTy) const {
	if (RetTy->isVoidType())
	return ABIArgInfo::getIgnore();

	int ArgGPRsLeft = 2;
	int ArgFPRsLeft = FLen ? 1 : 0;

	// The rules for return and argument types are the same, so defer to
	// classifyArgumentType.
	return classifyArgumentType(RetTy, ArgGPRsLeft, ArgFPRsLeft, true);
	}

	namespace {
	class CSKYTargetCodeGenInfo : public TargetCodeGenInfo {
	public:
	CSKYTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, unsigned FLen)
	: TargetCodeGenInfo(std::make_unique<CSKYABIInfo>(CGT, FLen)) {}
	};
	} // end anonymous namespace

	//===----------------------------------------------------------------------===//
	// Driver code
	//===----------------------------------------------------------------------===//

	bool CodeGenModule::supportsCOMDAT() const {
	return getTriple().supportsCOMDAT();
	}

	const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
	if (TheTargetCodeGenInfo)
	return *TheTargetCodeGenInfo;

	// Helper to set the unique_ptr while still keeping the return value.
	auto SetCGInfo = [&](TargetCodeGenInfo *P) -> const TargetCodeGenInfo & {
	this->TheTargetCodeGenInfo.reset(P);
	return *P;
	};

	const llvm::Triple &Triple = getTarget().getTriple();
	switch (Triple.getArch()) {
	default:
	return SetCGInfo(new DefaultTargetCodeGenInfo(Types));

	case llvm::Triple::le32:
	return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
	case llvm::Triple::m68k:
	return SetCGInfo(new M68kTargetCodeGenInfo(Types));
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	if (Triple.getOS() == llvm::Triple::NaCl)
	return SetCGInfo(new PNaClTargetCodeGenInfo(Types));
	return SetCGInfo(new MIPSTargetCodeGenInfo(Types, true));

	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	return SetCGInfo(new MIPSTargetCodeGenInfo(Types, false));

	case llvm::Triple::avr: {
	// For passing parameters, R8~R25 are used on avr, and R18~R25 are used
	// on avrtiny. For passing return value, R18~R25 are used on avr, and
	// R22~R25 are used on avrtiny.
	unsigned NPR = getTarget().getABI() == "avrtiny" ? 6 : 18;
	unsigned NRR = getTarget().getABI() == "avrtiny" ? 4 : 8;
	return SetCGInfo(new AVRTargetCodeGenInfo(Types, NPR, NRR));
	}

	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_32:
	case llvm::Triple::aarch64_be: {
	AArch64ABIInfo::ABIKind Kind = AArch64ABIInfo::AAPCS;
	if (getTarget().getABI() == "darwinpcs")
	Kind = AArch64ABIInfo::DarwinPCS;
	else if (Triple.isOSWindows())
	return SetCGInfo(
	new WindowsAArch64TargetCodeGenInfo(Types, AArch64ABIInfo::Win64));

	return SetCGInfo(new AArch64TargetCodeGenInfo(Types, Kind));
	}

	case llvm::Triple::wasm32:
	case llvm::Triple::wasm64: {
	WebAssemblyABIInfo::ABIKind Kind = WebAssemblyABIInfo::MVP;
	if (getTarget().getABI() == "experimental-mv")
	Kind = WebAssemblyABIInfo::ExperimentalMV;
	return SetCGInfo(new WebAssemblyTargetCodeGenInfo(Types, Kind));
	}

	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb: {
	if (Triple.getOS() == llvm::Triple::Win32) {
	return SetCGInfo(
	new WindowsARMTargetCodeGenInfo(Types, ARMABIInfo::AAPCS_VFP));
	}

	ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
	StringRef ABIStr = getTarget().getABI();
	if (ABIStr == "apcs-gnu")
	Kind = ARMABIInfo::APCS;
	else if (ABIStr == "aapcs16")
	Kind = ARMABIInfo::AAPCS16_VFP;
	else if (CodeGenOpts.FloatABI == "hard" \|\|
	(CodeGenOpts.FloatABI != "soft" &&
	(Triple.getEnvironment() == llvm::Triple::GNUEABIHF \|\|
	Triple.getEnvironment() == llvm::Triple::MuslEABIHF \|\|
	Triple.getEnvironment() == llvm::Triple::EABIHF)))
	Kind = ARMABIInfo::AAPCS_VFP;

	return SetCGInfo(new ARMTargetCodeGenInfo(Types, Kind));
	}

	case llvm::Triple::ppc: {
	if (Triple.isOSAIX())
	return SetCGInfo(new AIXTargetCodeGenInfo(Types, /Is64Bit/ false));

	bool IsSoftFloat =
	CodeGenOpts.FloatABI == "soft" \|\| getTarget().hasFeature("spe");
	bool RetSmallStructInRegABI =
	PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
	return SetCGInfo(
	new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
	}
	case llvm::Triple::ppcle: {
	bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";
	bool RetSmallStructInRegABI =
	PPC32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
	return SetCGInfo(
	new PPC32TargetCodeGenInfo(Types, IsSoftFloat, RetSmallStructInRegABI));
	}
	case llvm::Triple::ppc64:
	if (Triple.isOSAIX())
	return SetCGInfo(new AIXTargetCodeGenInfo(Types, /Is64Bit/ true));

	if (Triple.isOSBinFormatELF()) {
	PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
	if (getTarget().getABI() == "elfv2")
	Kind = PPC64_SVR4_ABIInfo::ELFv2;
	bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";

	return SetCGInfo(
	new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
	}
	return SetCGInfo(new PPC64TargetCodeGenInfo(Types));
	case llvm::Triple::ppc64le: {
	assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
	PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
	if (getTarget().getABI() == "elfv1")
	Kind = PPC64_SVR4_ABIInfo::ELFv1;
	bool IsSoftFloat = CodeGenOpts.FloatABI == "soft";

	return SetCGInfo(
	new PPC64_SVR4_TargetCodeGenInfo(Types, Kind, IsSoftFloat));
	}

	case llvm::Triple::nvptx:
	case llvm::Triple::nvptx64:
	return SetCGInfo(new NVPTXTargetCodeGenInfo(Types));

	case llvm::Triple::msp430:
	return SetCGInfo(new MSP430TargetCodeGenInfo(Types));

	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64: {
	StringRef ABIStr = getTarget().getABI();
	unsigned XLen = getTarget().getPointerWidth(0);
	unsigned ABIFLen = 0;
	if (ABIStr.endswith("f"))
	ABIFLen = 32;
	else if (ABIStr.endswith("d"))
	ABIFLen = 64;
	return SetCGInfo(new RISCVTargetCodeGenInfo(Types, XLen, ABIFLen));
	}

	case llvm::Triple::systemz: {
	bool SoftFloat = CodeGenOpts.FloatABI == "soft";
	bool HasVector = !SoftFloat && getTarget().getABI() == "vector";
	return SetCGInfo(new SystemZTargetCodeGenInfo(Types, HasVector, SoftFloat));
	}

	case llvm::Triple::tce:
	case llvm::Triple::tcele:
	return SetCGInfo(new TCETargetCodeGenInfo(Types));

	case llvm::Triple::x86: {
	bool IsDarwinVectorABI = Triple.isOSDarwin();
	bool RetSmallStructInRegABI =
	X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
	bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();

	if (Triple.getOS() == llvm::Triple::Win32) {
	return SetCGInfo(new WinX86_32TargetCodeGenInfo(
	Types, IsDarwinVectorABI, RetSmallStructInRegABI,
	IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
	} else {
	return SetCGInfo(new X86_32TargetCodeGenInfo(
	Types, IsDarwinVectorABI, RetSmallStructInRegABI,
	IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
	CodeGenOpts.FloatABI == "soft"));
	}
	}

	case llvm::Triple::x86_64: {
	StringRef ABI = getTarget().getABI();
	X86AVXABILevel AVXLevel =
	(ABI == "avx512"
	? X86AVXABILevel::AVX512
	: ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None);

	switch (Triple.getOS()) {
	case llvm::Triple::Win32:
	return SetCGInfo(new WinX86_64TargetCodeGenInfo(Types, AVXLevel));
	default:
	return SetCGInfo(new X86_64TargetCodeGenInfo(Types, AVXLevel));
	}
	}
	case llvm::Triple::hexagon:
	return SetCGInfo(new HexagonTargetCodeGenInfo(Types));
	case llvm::Triple::lanai:
	return SetCGInfo(new LanaiTargetCodeGenInfo(Types));
	case llvm::Triple::r600:
	return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
	case llvm::Triple::amdgcn:
	return SetCGInfo(new AMDGPUTargetCodeGenInfo(Types));
	case llvm::Triple::sparc:
	return SetCGInfo(new SparcV8TargetCodeGenInfo(Types));
	case llvm::Triple::sparcv9:
	return SetCGInfo(new SparcV9TargetCodeGenInfo(Types));
	case llvm::Triple::xcore:
	return SetCGInfo(new XCoreTargetCodeGenInfo(Types));
	case llvm::Triple::arc:
	return SetCGInfo(new ARCTargetCodeGenInfo(Types));
	case llvm::Triple::spir:
	case llvm::Triple::spir64:
	return SetCGInfo(new CommonSPIRTargetCodeGenInfo(Types));
	case llvm::Triple::spirv32:
	case llvm::Triple::spirv64:
	return SetCGInfo(new SPIRVTargetCodeGenInfo(Types));
	case llvm::Triple::ve:
	return SetCGInfo(new VETargetCodeGenInfo(Types));
	case llvm::Triple::csky: {
	bool IsSoftFloat = !getTarget().hasFeature("hard-float-abi");
	bool hasFP64 = getTarget().hasFeature("fpuv2_df") \|\|
	getTarget().hasFeature("fpuv3_df");
	return SetCGInfo(new CSKYTargetCodeGenInfo(Types, IsSoftFloat ? 0
	: hasFP64 ? 64
	: 32));
	}
	}
	}

	/// Create an OpenCL kernel for an enqueued block.
	///
	/// The kernel has the same function type as the block invoke function. Its
	/// name is the name of the block invoke function postfixed with "_kernel".
	/// It simply calls the block invoke function then returns.
	llvm::Function *
	TargetCodeGenInfo::createEnqueuedBlockKernel(CodeGenFunction &CGF,
	llvm::Function *Invoke,
	llvm::Type *BlockTy) const {
	auto *InvokeFT = Invoke->getFunctionType();
	auto &C = CGF.getLLVMContext();
	std::string Name = Invoke->getName().str() + "_kernel";
	auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C),
	InvokeFT->params(), false);
	auto *F = llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage, Name,
	&CGF.CGM.getModule());
	auto IP = CGF.Builder.saveIP();
	auto *BB = llvm::BasicBlock::Create(C, "entry", F);
	auto &Builder = CGF.Builder;
	Builder.SetInsertPoint(BB);
	llvm::SmallVector<llvm::Value *, 2> Args(llvm::make_pointer_range(F->args()));
	llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
	call->setCallingConv(Invoke->getCallingConv());
	Builder.CreateRetVoid();
	Builder.restoreIP(IP);
	return F;
	}

	/// Create an OpenCL kernel for an enqueued block.
	///
	/// The type of the first argument (the block literal) is the struct type
	/// of the block literal instead of a pointer type. The first argument
	/// (block literal) is passed directly by value to the kernel. The kernel
	/// allocates the same type of struct on stack and stores the block literal
	/// to it and passes its pointer to the block invoke function. The kernel
	/// has "enqueued-block" function attribute and kernel argument metadata.
	llvm::Function *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
	CodeGenFunction &CGF, llvm::Function *Invoke,
	llvm::Type *BlockTy) const {
	auto &Builder = CGF.Builder;
	auto &C = CGF.getLLVMContext();

	auto *InvokeFT = Invoke->getFunctionType();
	llvm::SmallVector<llvm::Type *, 2> ArgTys;
	llvm::SmallVector<llvm::Metadata *, 8> AddressQuals;
	llvm::SmallVector<llvm::Metadata *, 8> AccessQuals;
	llvm::SmallVector<llvm::Metadata *, 8> ArgTypeNames;
	llvm::SmallVector<llvm::Metadata *, 8> ArgBaseTypeNames;
	llvm::SmallVector<llvm::Metadata *, 8> ArgTypeQuals;
	llvm::SmallVector<llvm::Metadata *, 8> ArgNames;

	ArgTys.push_back(BlockTy);
	ArgTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
	AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(0)));
	ArgBaseTypeNames.push_back(llvm::MDString::get(C, "__block_literal"));
	ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
	AccessQuals.push_back(llvm::MDString::get(C, "none"));
	ArgNames.push_back(llvm::MDString::get(C, "block_literal"));
	for (unsigned I = 1, E = InvokeFT->getNumParams(); I < E; ++I) {
	ArgTys.push_back(InvokeFT->getParamType(I));
	ArgTypeNames.push_back(llvm::MDString::get(C, "void*"));
	AddressQuals.push_back(llvm::ConstantAsMetadata::get(Builder.getInt32(3)));
	AccessQuals.push_back(llvm::MDString::get(C, "none"));
	ArgBaseTypeNames.push_back(llvm::MDString::get(C, "void*"));
	ArgTypeQuals.push_back(llvm::MDString::get(C, ""));
	ArgNames.push_back(
	llvm::MDString::get(C, (Twine("local_arg") + Twine(I)).str()));
	}
	std::string Name = Invoke->getName().str() + "_kernel";
	auto *FT = llvm::FunctionType::get(llvm::Type::getVoidTy(C), ArgTys, false);
	auto *F = llvm::Function::Create(FT, llvm::GlobalValue::InternalLinkage, Name,
	&CGF.CGM.getModule());
	F->addFnAttr("enqueued-block");
	auto IP = CGF.Builder.saveIP();
	auto *BB = llvm::BasicBlock::Create(C, "entry", F);
	Builder.SetInsertPoint(BB);
	const auto BlockAlign = CGF.CGM.getDataLayout().getPrefTypeAlign(BlockTy);
	auto *BlockPtr = Builder.CreateAlloca(BlockTy, nullptr);
	BlockPtr->setAlignment(BlockAlign);
	Builder.CreateAlignedStore(F->arg_begin(), BlockPtr, BlockAlign);
	auto *Cast = Builder.CreatePointerCast(BlockPtr, InvokeFT->getParamType(0));
	llvm::SmallVector<llvm::Value *, 2> Args;
	Args.push_back(Cast);
	for (auto I = F->arg_begin() + 1, E = F->arg_end(); I != E; ++I)
	Args.push_back(I);
	llvm::CallInst *call = Builder.CreateCall(Invoke, Args);
	call->setCallingConv(Invoke->getCallingConv());
	Builder.CreateRetVoid();
	Builder.restoreIP(IP);

	F->setMetadata("kernel_arg_addr_space", llvm::MDNode::get(C, AddressQuals));
	F->setMetadata("kernel_arg_access_qual", llvm::MDNode::get(C, AccessQuals));
	F->setMetadata("kernel_arg_type", llvm::MDNode::get(C, ArgTypeNames));
	F->setMetadata("kernel_arg_base_type",
	llvm::MDNode::get(C, ArgBaseTypeNames));
	F->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(C, ArgTypeQuals));
	if (CGF.CGM.getCodeGenOpts().EmitOpenCLArgMetadata)
	F->setMetadata("kernel_arg_name", llvm::MDNode::get(C, ArgNames));

	return F;
	}
	diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
	index f203cae1d329..665cdc3132fb 100644
	--- a/clang/lib/Driver/ToolChains/Gnu.cpp
	+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
	@@ -1,3211 +1,3201 @@
	//===--- Gnu.cpp - Gnu Tool and ToolChain Implementations -------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "Gnu.h"
	#include "Arch/ARM.h"
	#include "Arch/CSKY.h"
	#include "Arch/Mips.h"
	#include "Arch/PPC.h"
	#include "Arch/RISCV.h"
	#include "Arch/Sparc.h"
	#include "Arch/SystemZ.h"
	#include "CommonArgs.h"
	#include "Linux.h"
	#include "clang/Config/config.h" // for GCC_INSTALL_PREFIX
	#include "clang/Driver/Compilation.h"
	#include "clang/Driver/Driver.h"
	#include "clang/Driver/DriverDiagnostic.h"
	#include "clang/Driver/Options.h"
	#include "clang/Driver/Tool.h"
	#include "clang/Driver/ToolChain.h"
	#include "llvm/Option/ArgList.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/TargetParser.h"
	#include "llvm/Support/VirtualFileSystem.h"
	#include <system_error>

	using namespace clang::driver;
	using namespace clang::driver::toolchains;
	using namespace clang;
	using namespace llvm::opt;

	using tools::addMultilibFlag;
	using tools::addPathIfExists;

	static bool forwardToGCC(const Option &O) {
	// LinkerInput options have been forwarded. Don't duplicate.
	if (O.hasFlag(options::LinkerInput))
	return false;
	return O.matches(options::OPT_Link_Group) \|\| O.hasFlag(options::LinkOption);
	}

	// Switch CPU names not recognized by GNU assembler to a close CPU that it does
	// recognize, instead of a lower march from being picked in the absence of a cpu
	// flag.
	static void normalizeCPUNamesForAssembler(const ArgList &Args,
	ArgStringList &CmdArgs) {
	if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
	StringRef CPUArg(A->getValue());
	if (CPUArg.equals_insensitive("krait"))
	CmdArgs.push_back("-mcpu=cortex-a15");
	else if (CPUArg.equals_insensitive("kryo"))
	CmdArgs.push_back("-mcpu=cortex-a57");
	else
	Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ);
	}
	}

	void tools::gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	const Driver &D = getToolChain().getDriver();
	ArgStringList CmdArgs;

	for (const auto &A : Args) {
	if (forwardToGCC(A->getOption())) {
	// It is unfortunate that we have to claim here, as this means
	// we will basically never report anything interesting for
	// platforms using a generic gcc, even if we are just using gcc
	// to get to the assembler.
	A->claim();

	A->render(Args, CmdArgs);
	}
	}

	RenderExtraToolArgs(JA, CmdArgs);

	// If using a driver driver, force the arch.
	if (getToolChain().getTriple().isOSDarwin()) {
	CmdArgs.push_back("-arch");
	CmdArgs.push_back(
	Args.MakeArgString(getToolChain().getDefaultUniversalArchName()));
	}

	// Try to force gcc to match the tool chain we want, if we recognize
	// the arch.
	//
	// FIXME: The triple class should directly provide the information we want
	// here.
	switch (getToolChain().getArch()) {
	default:
	break;
	case llvm::Triple::x86:
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	CmdArgs.push_back("-m32");
	break;
	case llvm::Triple::x86_64:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	CmdArgs.push_back("-m64");
	break;
	case llvm::Triple::sparcel:
	CmdArgs.push_back("-EL");
	break;
	}

	if (Output.isFilename()) {
	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());
	} else {
	assert(Output.isNothing() && "Unexpected output");
	CmdArgs.push_back("-fsyntax-only");
	}

	Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);

	// Only pass -x if gcc will understand it; otherwise hope gcc
	// understands the suffix correctly. The main use case this would go
	// wrong in is for linker inputs if they happened to have an odd
	// suffix; really the only way to get this to happen is a command
	// like '-x foobar a.c' which will treat a.c like a linker input.
	//
	// FIXME: For the linker case specifically, can we safely convert
	// inputs into '-Wl,' options?
	for (const auto &II : Inputs) {
	// Don't try to pass LLVM or AST inputs to a generic gcc.
	if (types::isLLVMIR(II.getType()))
	D.Diag(clang::diag::err_drv_no_linker_llvm_support)
	<< getToolChain().getTripleString();
	else if (II.getType() == types::TY_AST)
	D.Diag(diag::err_drv_no_ast_support) << getToolChain().getTripleString();
	else if (II.getType() == types::TY_ModuleFile)
	D.Diag(diag::err_drv_no_module_support)
	<< getToolChain().getTripleString();

	if (types::canTypeBeUserSpecified(II.getType())) {
	CmdArgs.push_back("-x");
	CmdArgs.push_back(types::getTypeName(II.getType()));
	}

	if (II.isFilename())
	CmdArgs.push_back(II.getFilename());
	else {
	const Arg &A = II.getInputArg();

	// Reverse translate some rewritten options.
	if (A.getOption().matches(options::OPT_Z_reserved_lib_stdcxx)) {
	CmdArgs.push_back("-lstdc++");
	continue;
	}

	// Don't render as input, we need gcc to do the translations.
	A.render(Args, CmdArgs);
	}
	}

	const std::string &customGCCName = D.getCCCGenericGCCName();
	const char *GCCName;
	if (!customGCCName.empty())
	GCCName = customGCCName.c_str();
	else if (D.CCCIsCXX()) {
	GCCName = "g++";
	} else
	GCCName = "gcc";

	const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));
	}

	void tools::gcc::Preprocessor::RenderExtraToolArgs(
	const JobAction &JA, ArgStringList &CmdArgs) const {
	CmdArgs.push_back("-E");
	}

	void tools::gcc::Compiler::RenderExtraToolArgs(const JobAction &JA,
	ArgStringList &CmdArgs) const {
	const Driver &D = getToolChain().getDriver();

	switch (JA.getType()) {
	// If -flto, etc. are present then make sure not to force assembly output.
	case types::TY_LLVM_IR:
	case types::TY_LTO_IR:
	case types::TY_LLVM_BC:
	case types::TY_LTO_BC:
	CmdArgs.push_back("-c");
	break;
	// We assume we've got an "integrated" assembler in that gcc will produce an
	// object file itself.
	case types::TY_Object:
	CmdArgs.push_back("-c");
	break;
	case types::TY_PP_Asm:
	CmdArgs.push_back("-S");
	break;
	case types::TY_Nothing:
	CmdArgs.push_back("-fsyntax-only");
	break;
	default:
	D.Diag(diag::err_drv_invalid_gcc_output_type) << getTypeName(JA.getType());
	}
	}

	void tools::gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
	ArgStringList &CmdArgs) const {
	// The types are (hopefully) good enough.
	}

	// On Arm the endianness of the output file is determined by the target and
	// can be overridden by the pseudo-target flags '-mlittle-endian'/'-EL' and
	// '-mbig-endian'/'-EB'. Unlike other targets the flag does not result in a
	// normalized triple so we must handle the flag here.
	static bool isArmBigEndian(const llvm::Triple &Triple,
	const ArgList &Args) {
	bool IsBigEndian = false;
	switch (Triple.getArch()) {
	case llvm::Triple::armeb:
	case llvm::Triple::thumbeb:
	IsBigEndian = true;
	LLVM_FALLTHROUGH;
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
	options::OPT_mbig_endian))
	IsBigEndian = !A->getOption().matches(options::OPT_mlittle_endian);
	break;
	default:
	break;
	}
	return IsBigEndian;
	}

	static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
	switch (T.getArch()) {
	case llvm::Triple::x86:
	if (T.isOSIAMCU())
	return "elf_iamcu";
	return "elf_i386";
	case llvm::Triple::aarch64:
	return "aarch64linux";
	case llvm::Triple::aarch64_be:
	return "aarch64linuxb";
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	case llvm::Triple::armeb:
	case llvm::Triple::thumbeb:
	return isArmBigEndian(T, Args) ? "armelfb_linux_eabi" : "armelf_linux_eabi";
	case llvm::Triple::m68k:
	return "m68kelf";
	case llvm::Triple::ppc:
	if (T.isOSLinux())
	return "elf32ppclinux";
	return "elf32ppc";
	case llvm::Triple::ppcle:
	if (T.isOSLinux())
	return "elf32lppclinux";
	return "elf32lppc";
	case llvm::Triple::ppc64:
	return "elf64ppc";
	case llvm::Triple::ppc64le:
	return "elf64lppc";
	case llvm::Triple::riscv32:
	return "elf32lriscv";
	case llvm::Triple::riscv64:
	return "elf64lriscv";
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	return "elf32_sparc";
	case llvm::Triple::sparcv9:
	return "elf64_sparc";
	case llvm::Triple::mips:
	return "elf32btsmip";
	case llvm::Triple::mipsel:
	return "elf32ltsmip";
	case llvm::Triple::mips64:
	if (tools::mips::hasMipsAbiArg(Args, "n32") \|\|
	T.getEnvironment() == llvm::Triple::GNUABIN32)
	return "elf32btsmipn32";
	return "elf64btsmip";
	case llvm::Triple::mips64el:
	if (tools::mips::hasMipsAbiArg(Args, "n32") \|\|
	T.getEnvironment() == llvm::Triple::GNUABIN32)
	return "elf32ltsmipn32";
	return "elf64ltsmip";
	case llvm::Triple::systemz:
	return "elf64_s390";
	case llvm::Triple::x86_64:
	if (T.isX32())
	return "elf32_x86_64";
	return "elf_x86_64";
	case llvm::Triple::ve:
	return "elf64ve";
	case llvm::Triple::csky:
	return "cskyelf_linux";
	default:
	return nullptr;
	}
	}

	static bool getPIE(const ArgList &Args, const ToolChain &TC) {
	if (Args.hasArg(options::OPT_shared) \|\| Args.hasArg(options::OPT_static) \|\|
	Args.hasArg(options::OPT_r) \|\| Args.hasArg(options::OPT_static_pie))
	return false;

	Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
	options::OPT_nopie);
	if (!A)
	return TC.isPIEDefault(Args);
	return A->getOption().matches(options::OPT_pie);
	}

	static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) {
	bool HasStaticPIE = Args.hasArg(options::OPT_static_pie);
	// -no-pie is an alias for -nopie. So, handling -nopie takes care of
	// -no-pie as well.
	if (HasStaticPIE && Args.hasArg(options::OPT_nopie)) {
	const Driver &D = TC.getDriver();
	const llvm::opt::OptTable &Opts = D.getOpts();
	const char *StaticPIEName = Opts.getOptionName(options::OPT_static_pie);
	const char *NoPIEName = Opts.getOptionName(options::OPT_nopie);
	D.Diag(diag::err_drv_cannot_mix_options) << StaticPIEName << NoPIEName;
	}
	return HasStaticPIE;
	}

	static bool getStatic(const ArgList &Args) {
	return Args.hasArg(options::OPT_static) &&
	!Args.hasArg(options::OPT_static_pie);
	}

	void tools::gnutools::StaticLibTool::ConstructJob(
	Compilation &C, const JobAction &JA, const InputInfo &Output,
	const InputInfoList &Inputs, const ArgList &Args,
	const char *LinkingOutput) const {
	const Driver &D = getToolChain().getDriver();

	// Silence warning for "clang -g foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_g_Group);
	// and "clang -emit-llvm foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_emit_llvm);
	// and for "clang -w foo.o -o foo". Other warning options are already
	// handled somewhere else.
	Args.ClaimAllArgs(options::OPT_w);
	// Silence warnings when linking C code with a C++ '-stdlib' argument.
	Args.ClaimAllArgs(options::OPT_stdlib_EQ);

	// ar tool command "llvm-ar <options> <output_file> <input_files>".
	ArgStringList CmdArgs;
	// Create and insert file members with a deterministic index.
	CmdArgs.push_back("rcsD");
	CmdArgs.push_back(Output.getFilename());

	for (const auto &II : Inputs) {
	if (II.isFilename()) {
	CmdArgs.push_back(II.getFilename());
	}
	}

	// Delete old output archive file if it already exists before generating a new
	// archive file.
	auto OutputFileName = Output.getFilename();
	if (Output.isFilename() && llvm::sys::fs::exists(OutputFileName)) {
	if (std::error_code EC = llvm::sys::fs::remove(OutputFileName)) {
	D.Diag(diag::err_drv_unable_to_remove_file) << EC.message();
	return;
	}
	}

	const char *Exec = Args.MakeArgString(getToolChain().GetStaticLibToolPath());
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));
	}

	void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	// FIXME: The Linker class constructor takes a ToolChain and not a
	// Generic_ELF, so the static_cast might return a reference to a invalid
	// instance (see PR45061). Ideally, the Linker constructor needs to take a
	// Generic_ELF instead.
	const toolchains::Generic_ELF &ToolChain =
	static_cast<const toolchains::Generic_ELF &>(getToolChain());
	const Driver &D = ToolChain.getDriver();

	const llvm::Triple &Triple = getToolChain().getEffectiveTriple();

	const llvm::Triple::ArchType Arch = ToolChain.getArch();
	const bool isAndroid = ToolChain.getTriple().isAndroid();
	const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
	const bool IsVE = ToolChain.getTriple().isVE();
	const bool IsPIE = getPIE(Args, ToolChain);
	const bool IsStaticPIE = getStaticPIE(Args, ToolChain);
	const bool IsStatic = getStatic(Args);
	const bool HasCRTBeginEndFiles =
	ToolChain.getTriple().hasEnvironment() \|\|
	(ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies);

	ArgStringList CmdArgs;

	// Silence warning for "clang -g foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_g_Group);
	// and "clang -emit-llvm foo.o -o foo"
	Args.ClaimAllArgs(options::OPT_emit_llvm);
	// and for "clang -w foo.o -o foo". Other warning options are already
	// handled somewhere else.
	Args.ClaimAllArgs(options::OPT_w);

	if (!D.SysRoot.empty())
	CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));

	if (IsPIE)
	CmdArgs.push_back("-pie");

	if (IsStaticPIE) {
	CmdArgs.push_back("-static");
	CmdArgs.push_back("-pie");
	CmdArgs.push_back("--no-dynamic-linker");
	CmdArgs.push_back("-z");
	CmdArgs.push_back("text");
	}

	if (Args.hasArg(options::OPT_rdynamic))
	CmdArgs.push_back("-export-dynamic");

	if (Args.hasArg(options::OPT_s))
	CmdArgs.push_back("-s");

	if (Triple.isARM() \|\| Triple.isThumb() \|\| Triple.isAArch64()) {
	bool IsBigEndian = isArmBigEndian(Triple, Args);
	if (IsBigEndian)
	arm::appendBE8LinkFlag(Args, CmdArgs, Triple);
	IsBigEndian = IsBigEndian \|\| Arch == llvm::Triple::aarch64_be;
	CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL");
	}

	// Most Android ARM64 targets should enable the linker fix for erratum
	// 843419. Only non-Cortex-A53 devices are allowed to skip this flag.
	if (Arch == llvm::Triple::aarch64 && isAndroid) {
	std::string CPU = getCPUName(D, Args, Triple);
	if (CPU.empty() \|\| CPU == "generic" \|\| CPU == "cortex-a53")
	CmdArgs.push_back("--fix-cortex-a53-843419");
	}

	ToolChain.addExtraOpts(CmdArgs);

	CmdArgs.push_back("--eh-frame-hdr");

	if (const char *LDMOption = getLDMOption(ToolChain.getTriple(), Args)) {
	CmdArgs.push_back("-m");
	CmdArgs.push_back(LDMOption);
	} else {
	D.Diag(diag::err_target_unknown_triple) << Triple.str();
	return;
	}
	if (Triple.isRISCV())
	CmdArgs.push_back("-X");

	if (Args.hasArg(options::OPT_shared))
	CmdArgs.push_back("-shared");

	if (IsStatic) {
	CmdArgs.push_back("-static");
	} else {
	if (Args.hasArg(options::OPT_rdynamic))
	CmdArgs.push_back("-export-dynamic");

	if (!Args.hasArg(options::OPT_shared) && !IsStaticPIE &&
	!Args.hasArg(options::OPT_r)) {
	CmdArgs.push_back("-dynamic-linker");
	CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
	ToolChain.getDynamicLinker(Args)));
	}
	}

	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles,
	options::OPT_r)) {
	if (!isAndroid && !IsIAMCU) {
	const char *crt1 = nullptr;
	if (!Args.hasArg(options::OPT_shared)) {
	if (Args.hasArg(options::OPT_pg))
	crt1 = "gcrt1.o";
	else if (IsPIE)
	crt1 = "Scrt1.o";
	else if (IsStaticPIE)
	crt1 = "rcrt1.o";
	else
	crt1 = "crt1.o";
	}
	if (crt1)
	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));

	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
	}

	if (IsVE) {
	CmdArgs.push_back("-z");
	CmdArgs.push_back("max-page-size=0x4000000");
	}

	if (IsIAMCU)
	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
	else if (HasCRTBeginEndFiles) {
	std::string P;
	if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT &&
	!isAndroid) {
	std::string crtbegin = ToolChain.getCompilerRT(Args, "crtbegin",
	ToolChain::FT_Object);
	if (ToolChain.getVFS().exists(crtbegin))
	P = crtbegin;
	}
	if (P.empty()) {
	const char *crtbegin;
	if (Args.hasArg(options::OPT_shared))
	crtbegin = isAndroid ? "crtbegin_so.o" : "crtbeginS.o";
	else if (IsStatic)
	crtbegin = isAndroid ? "crtbegin_static.o" : "crtbeginT.o";
	else if (IsPIE \|\| IsStaticPIE)
	crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
	else
	crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
	P = ToolChain.GetFilePath(crtbegin);
	}
	CmdArgs.push_back(Args.MakeArgString(P));
	}

	// Add crtfastmath.o if available and fast math is enabled.
	ToolChain.addFastMathRuntimeIfAvailable(Args, CmdArgs);
	}

	Args.AddAllArgs(CmdArgs, options::OPT_L);
	Args.AddAllArgs(CmdArgs, options::OPT_u);

	ToolChain.AddFilePathLibArgs(Args, CmdArgs);

	if (D.isUsingLTO()) {
	assert(!Inputs.empty() && "Must have at least one input.");
	addLTOOptions(ToolChain, Args, CmdArgs, Output, Inputs[0],
	D.getLTOMode() == LTOK_Thin);
	}

	if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
	CmdArgs.push_back("--no-demangle");

	bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
	bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs);
	addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs);
	AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);

	addHIPRuntimeLibArgs(ToolChain, Args, CmdArgs);

	// The profile runtime also needs access to system libraries.
	getToolChain().addProfileRTLibs(Args, CmdArgs);

	if (D.CCCIsCXX() &&
	!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs,
	options::OPT_r)) {
	if (ToolChain.ShouldLinkCXXStdlib(Args)) {
	bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
	!Args.hasArg(options::OPT_static);
	if (OnlyLibstdcxxStatic)
	CmdArgs.push_back("-Bstatic");
	ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
	if (OnlyLibstdcxxStatic)
	CmdArgs.push_back("-Bdynamic");
	}
	CmdArgs.push_back("-lm");
	}

	// If we are linking for the device all symbols should be bound locally. The
	// symbols are already protected which makes this redundant. This is only
	// necessary to work around a problem in bfd.
	// TODO: Remove this once 'lld' becomes the only linker for offloading.
	if (JA.isDeviceOffloading(Action::OFK_OpenMP))
	CmdArgs.push_back("-Bsymbolic");

	// Silence warnings when linking C code with a C++ '-stdlib' argument.
	Args.ClaimAllArgs(options::OPT_stdlib_EQ);

	// Additional linker set-up and flags for Fortran. This is required in order
	// to generate executables. As Fortran runtime depends on the C runtime,
	// these dependencies need to be listed before the C runtime below (i.e.
	// AddRuntTimeLibs).
	if (D.IsFlangMode()) {
	addFortranRuntimeLibraryPath(ToolChain, Args, CmdArgs);
	addFortranRuntimeLibs(ToolChain, CmdArgs);
	CmdArgs.push_back("-lm");
	}

	if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r)) {
	if (!Args.hasArg(options::OPT_nodefaultlibs)) {
	if (IsStatic \|\| IsStaticPIE)
	CmdArgs.push_back("--start-group");

	if (NeedsSanitizerDeps)
	linkSanitizerRuntimeDeps(ToolChain, CmdArgs);

	if (NeedsXRayDeps)
	linkXRayRuntimeDeps(ToolChain, CmdArgs);

	bool WantPthread = Args.hasArg(options::OPT_pthread) \|\|
	Args.hasArg(options::OPT_pthreads);

	// Use the static OpenMP runtime with -static-openmp
	bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) &&
	!Args.hasArg(options::OPT_static);

	// FIXME: Only pass GompNeedsRT = true for platforms with libgomp that
	// require librt. Most modern Linux platforms do, but some may not.
	if (addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP,
	JA.isHostOffloading(Action::OFK_OpenMP),
	/* GompNeedsRT= */ true))
	// OpenMP runtimes implies pthreads when using the GNU toolchain.
	// FIXME: Does this really make sense for all GNU toolchains?
	WantPthread = true;

	AddRunTimeLibs(ToolChain, D, CmdArgs, Args);

	// LLVM support for atomics on 32-bit SPARC V8+ is incomplete, so
	// forcibly link with libatomic as a workaround.
	// TODO: Issue #41880 and D118021.
	if (getToolChain().getTriple().getArch() == llvm::Triple::sparc) {
	CmdArgs.push_back("--push-state");
	CmdArgs.push_back("--as-needed");
	CmdArgs.push_back("-latomic");
	CmdArgs.push_back("--pop-state");
	}

	if (WantPthread && !isAndroid)
	CmdArgs.push_back("-lpthread");

	if (Args.hasArg(options::OPT_fsplit_stack))
	CmdArgs.push_back("--wrap=pthread_create");

	if (!Args.hasArg(options::OPT_nolibc))
	CmdArgs.push_back("-lc");

	// Add IAMCU specific libs, if needed.
	if (IsIAMCU)
	CmdArgs.push_back("-lgloss");

	if (IsStatic \|\| IsStaticPIE)
	CmdArgs.push_back("--end-group");
	else
	AddRunTimeLibs(ToolChain, D, CmdArgs, Args);

	// Add IAMCU specific libs (outside the group), if needed.
	if (IsIAMCU) {
	CmdArgs.push_back("--as-needed");
	CmdArgs.push_back("-lsoftfp");
	CmdArgs.push_back("--no-as-needed");
	}
	}

	if (!Args.hasArg(options::OPT_nostartfiles) && !IsIAMCU) {
	if (HasCRTBeginEndFiles) {
	std::string P;
	if (ToolChain.GetRuntimeLibType(Args) == ToolChain::RLT_CompilerRT &&
	!isAndroid) {
	std::string crtend = ToolChain.getCompilerRT(Args, "crtend",
	ToolChain::FT_Object);
	if (ToolChain.getVFS().exists(crtend))
	P = crtend;
	}
	if (P.empty()) {
	const char *crtend;
	if (Args.hasArg(options::OPT_shared))
	crtend = isAndroid ? "crtend_so.o" : "crtendS.o";
	else if (IsPIE \|\| IsStaticPIE)
	crtend = isAndroid ? "crtend_android.o" : "crtendS.o";
	else
	crtend = isAndroid ? "crtend_android.o" : "crtend.o";
	P = ToolChain.GetFilePath(crtend);
	}
	CmdArgs.push_back(Args.MakeArgString(P));
	}
	if (!isAndroid)
	CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
	}
	}

	Args.AddAllArgs(CmdArgs, options::OPT_T);

	const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));
	}

	void tools::gnutools::Assembler::ConstructJob(Compilation &C,
	const JobAction &JA,
	const InputInfo &Output,
	const InputInfoList &Inputs,
	const ArgList &Args,
	const char *LinkingOutput) const {
	const auto &D = getToolChain().getDriver();

	claimNoWarnArgs(Args);

	ArgStringList CmdArgs;

	llvm::Reloc::Model RelocationModel;
	unsigned PICLevel;
	bool IsPIE;
	const char *DefaultAssembler = "as";
	std::tie(RelocationModel, PICLevel, IsPIE) =
	ParsePICArgs(getToolChain(), Args);

	if (const Arg *A = Args.getLastArg(options::OPT_gz, options::OPT_gz_EQ)) {
	if (A->getOption().getID() == options::OPT_gz) {
	CmdArgs.push_back("--compress-debug-sections");
	} else {
	StringRef Value = A->getValue();
	if (Value == "none" \|\| Value == "zlib") {
	CmdArgs.push_back(
	Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
	} else {
	D.Diag(diag::err_drv_unsupported_option_argument)
	<< A->getOption().getName() << Value;
	}
	}
	}

	switch (getToolChain().getArch()) {
	default:
	break;
	// Add --32/--64 to make sure we get the format we want.
	// This is incomplete
	case llvm::Triple::x86:
	CmdArgs.push_back("--32");
	break;
	case llvm::Triple::x86_64:
	if (getToolChain().getTriple().isX32())
	CmdArgs.push_back("--x32");
	else
	CmdArgs.push_back("--64");
	break;
	case llvm::Triple::ppc: {
	CmdArgs.push_back("-a32");
	CmdArgs.push_back("-mppc");
	CmdArgs.push_back("-mbig-endian");
	CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
	getCPUName(D, Args, getToolChain().getTriple())));
	break;
	}
	case llvm::Triple::ppcle: {
	CmdArgs.push_back("-a32");
	CmdArgs.push_back("-mppc");
	CmdArgs.push_back("-mlittle-endian");
	CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
	getCPUName(D, Args, getToolChain().getTriple())));
	break;
	}
	case llvm::Triple::ppc64: {
	CmdArgs.push_back("-a64");
	CmdArgs.push_back("-mppc64");
	CmdArgs.push_back("-mbig-endian");
	CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
	getCPUName(D, Args, getToolChain().getTriple())));
	break;
	}
	case llvm::Triple::ppc64le: {
	CmdArgs.push_back("-a64");
	CmdArgs.push_back("-mppc64");
	CmdArgs.push_back("-mlittle-endian");
	CmdArgs.push_back(ppc::getPPCAsmModeForCPU(
	getCPUName(D, Args, getToolChain().getTriple())));
	break;
	}
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64: {
	StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple());
	CmdArgs.push_back("-mabi");
	CmdArgs.push_back(ABIName.data());
	StringRef MArchName = riscv::getRISCVArch(Args, getToolChain().getTriple());
	CmdArgs.push_back("-march");
	CmdArgs.push_back(MArchName.data());
	if (!Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true))
	Args.addOptOutFlag(CmdArgs, options::OPT_mrelax, options::OPT_mno_relax);
	break;
	}
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel: {
	CmdArgs.push_back("-32");
	std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
	CmdArgs.push_back(
	sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
	AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
	break;
	}
	case llvm::Triple::sparcv9: {
	CmdArgs.push_back("-64");
	std::string CPU = getCPUName(D, Args, getToolChain().getTriple());
	CmdArgs.push_back(
	sparc::getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
	AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
	break;
	}
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb: {
	const llvm::Triple &Triple2 = getToolChain().getTriple();
	CmdArgs.push_back(isArmBigEndian(Triple2, Args) ? "-EB" : "-EL");
	switch (Triple2.getSubArch()) {
	case llvm::Triple::ARMSubArch_v7:
	CmdArgs.push_back("-mfpu=neon");
	break;
	case llvm::Triple::ARMSubArch_v8:
	CmdArgs.push_back("-mfpu=crypto-neon-fp-armv8");
	break;
	default:
	break;
	}

	switch (arm::getARMFloatABI(getToolChain(), Args)) {
	case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!");
	case arm::FloatABI::Soft:
	CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft"));
	break;
	case arm::FloatABI::SoftFP:
	CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp"));
	break;
	case arm::FloatABI::Hard:
	CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard"));
	break;
	}

	Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
	normalizeCPUNamesForAssembler(Args, CmdArgs);

	Args.AddLastArg(CmdArgs, options::OPT_mfpu_EQ);
	break;
	}
	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_be: {
	CmdArgs.push_back(
	getToolChain().getArch() == llvm::Triple::aarch64_be ? "-EB" : "-EL");
	Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
	normalizeCPUNamesForAssembler(Args, CmdArgs);

	break;
	}
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el: {
	StringRef CPUName;
	StringRef ABIName;
	mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
	ABIName = mips::getGnuCompatibleMipsABIName(ABIName);

	CmdArgs.push_back("-march");
	CmdArgs.push_back(CPUName.data());

	CmdArgs.push_back("-mabi");
	CmdArgs.push_back(ABIName.data());

	// -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
	// or -mshared (not implemented) is in effect.
	if (RelocationModel == llvm::Reloc::Static)
	CmdArgs.push_back("-mno-shared");

	// LLVM doesn't support -mplt yet and acts as if it is always given.
	// However, -mplt has no effect with the N64 ABI.
	if (ABIName != "64" && !Args.hasArg(options::OPT_mno_abicalls))
	CmdArgs.push_back("-call_nonpic");

	if (getToolChain().getTriple().isLittleEndian())
	CmdArgs.push_back("-EL");
	else
	CmdArgs.push_back("-EB");

	if (Arg *A = Args.getLastArg(options::OPT_mnan_EQ)) {
	if (StringRef(A->getValue()) == "2008")
	CmdArgs.push_back(Args.MakeArgString("-mnan=2008"));
	}

	// Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default.
	if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
	options::OPT_mfp64)) {
	A->claim();
	A->render(Args, CmdArgs);
	} else if (mips::shouldUseFPXX(
	Args, getToolChain().getTriple(), CPUName, ABIName,
	mips::getMipsFloatABI(getToolChain().getDriver(), Args,
	getToolChain().getTriple())))
	CmdArgs.push_back("-mfpxx");

	// Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
	// -mno-mips16 is actually -no-mips16.
	if (Arg *A =
	Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16)) {
	if (A->getOption().matches(options::OPT_mips16)) {
	A->claim();
	A->render(Args, CmdArgs);
	} else {
	A->claim();
	CmdArgs.push_back("-no-mips16");
	}
	}

	Args.AddLastArg(CmdArgs, options::OPT_mmicromips,
	options::OPT_mno_micromips);
	Args.AddLastArg(CmdArgs, options::OPT_mdsp, options::OPT_mno_dsp);
	Args.AddLastArg(CmdArgs, options::OPT_mdspr2, options::OPT_mno_dspr2);

	if (Arg *A = Args.getLastArg(options::OPT_mmsa, options::OPT_mno_msa)) {
	// Do not use AddLastArg because not all versions of MIPS assembler
	// support -mmsa / -mno-msa options.
	if (A->getOption().matches(options::OPT_mmsa))
	CmdArgs.push_back(Args.MakeArgString("-mmsa"));
	}

	Args.AddLastArg(CmdArgs, options::OPT_mhard_float,
	options::OPT_msoft_float);

	Args.AddLastArg(CmdArgs, options::OPT_mdouble_float,
	options::OPT_msingle_float);

	Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
	options::OPT_mno_odd_spreg);

	AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
	break;
	}
	case llvm::Triple::systemz: {
	// Always pass an -march option, since our default of z10 is later
	// than the GNU assembler's default.
	std::string CPUName = systemz::getSystemZTargetCPU(Args);
	CmdArgs.push_back(Args.MakeArgString("-march=" + CPUName));
	break;
	}
	case llvm::Triple::ve:
	DefaultAssembler = "nas";
	}

	for (const Arg *A : Args.filtered(options::OPT_ffile_prefix_map_EQ,
	options::OPT_fdebug_prefix_map_EQ)) {
	StringRef Map = A->getValue();
	if (!Map.contains('='))
	D.Diag(diag::err_drv_invalid_argument_to_option)
	<< Map << A->getOption().getName();
	else {
	CmdArgs.push_back(Args.MakeArgString("--debug-prefix-map"));
	CmdArgs.push_back(Args.MakeArgString(Map));
	}
	A->claim();
	}

	Args.AddAllArgs(CmdArgs, options::OPT_I);
	Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);

	CmdArgs.push_back("-o");
	CmdArgs.push_back(Output.getFilename());

	for (const auto &II : Inputs)
	CmdArgs.push_back(II.getFilename());

	const char *Exec =
	Args.MakeArgString(getToolChain().GetProgramPath(DefaultAssembler));
	C.addCommand(std::make_unique<Command>(JA, *this,
	ResponseFileSupport::AtFileCurCP(),
	Exec, CmdArgs, Inputs, Output));

	// Handle the debug info splitting at object creation time if we're
	// creating an object.
	// TODO: Currently only works on linux with newer objcopy.
	if (Args.hasArg(options::OPT_gsplit_dwarf) &&
	getToolChain().getTriple().isOSLinux())
	SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
	SplitDebugName(JA, Args, Inputs[0], Output));
	}

	namespace {
	// Filter to remove Multilibs that don't exist as a suffix to Path
	class FilterNonExistent {
	StringRef Base, File;
	llvm::vfs::FileSystem &VFS;

	public:
	FilterNonExistent(StringRef Base, StringRef File, llvm::vfs::FileSystem &VFS)
	: Base(Base), File(File), VFS(VFS) {}
	bool operator()(const Multilib &M) {
	return !VFS.exists(Base + M.gccSuffix() + File);
	}
	};
	} // end anonymous namespace

	static bool isSoftFloatABI(const ArgList &Args) {
	Arg *A = Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
	options::OPT_mfloat_abi_EQ);
	if (!A)
	return false;

	return A->getOption().matches(options::OPT_msoft_float) \|\|
	(A->getOption().matches(options::OPT_mfloat_abi_EQ) &&
	A->getValue() == StringRef("soft"));
	}

	static bool isArmOrThumbArch(llvm::Triple::ArchType Arch) {
	return Arch == llvm::Triple::arm \|\| Arch == llvm::Triple::thumb;
	}

	static bool isMipsEL(llvm::Triple::ArchType Arch) {
	return Arch == llvm::Triple::mipsel \|\| Arch == llvm::Triple::mips64el;
	}

	static bool isMips16(const ArgList &Args) {
	Arg *A = Args.getLastArg(options::OPT_mips16, options::OPT_mno_mips16);
	return A && A->getOption().matches(options::OPT_mips16);
	}

	static bool isMicroMips(const ArgList &Args) {
	Arg *A = Args.getLastArg(options::OPT_mmicromips, options::OPT_mno_micromips);
	return A && A->getOption().matches(options::OPT_mmicromips);
	}

	static bool isMSP430(llvm::Triple::ArchType Arch) {
	return Arch == llvm::Triple::msp430;
	}

	static Multilib makeMultilib(StringRef commonSuffix) {
	return Multilib(commonSuffix, commonSuffix, commonSuffix);
	}

	static bool findMipsCsMultilibs(const Multilib::flags_list &Flags,
	FilterNonExistent &NonExistent,
	DetectedMultilibs &Result) {
	// Check for Code Sourcery toolchain multilibs
	MultilibSet CSMipsMultilibs;
	{
	auto MArchMips16 = makeMultilib("/mips16").flag("+m32").flag("+mips16");

	auto MArchMicroMips =
	makeMultilib("/micromips").flag("+m32").flag("+mmicromips");

	auto MArchDefault = makeMultilib("").flag("-mips16").flag("-mmicromips");

	auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");

	auto SoftFloat = makeMultilib("/soft-float").flag("+msoft-float");

	auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");

	auto DefaultFloat =
	makeMultilib("").flag("-msoft-float").flag("-mnan=2008");

	auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");

	auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");

	// Note that this one's osSuffix is ""
	auto MAbi64 = makeMultilib("")
	.gccSuffix("/64")
	.includeSuffix("/64")
	.flag("+mabi=n64")
	.flag("-mabi=n32")
	.flag("-m32");

	CSMipsMultilibs =
	MultilibSet()
	.Either(MArchMips16, MArchMicroMips, MArchDefault)
	.Maybe(UCLibc)
	.Either(SoftFloat, Nan2008, DefaultFloat)
	.FilterOut("/micromips/nan2008")
	.FilterOut("/mips16/nan2008")
	.Either(BigEndian, LittleEndian)
	.Maybe(MAbi64)
	.FilterOut("/mips16.*/64")
	.FilterOut("/micromips.*/64")
	.FilterOut(NonExistent)
	.setIncludeDirsCallback([](const Multilib &M) {
	std::vector<std::string> Dirs({"/include"});
	if (StringRef(M.includeSuffix()).startswith("/uclibc"))
	Dirs.push_back(
	"/../../../../mips-linux-gnu/libc/uclibc/usr/include");
	else
	Dirs.push_back("/../../../../mips-linux-gnu/libc/usr/include");
	return Dirs;
	});
	}

	MultilibSet DebianMipsMultilibs;
	{
	Multilib MAbiN32 =
	Multilib().gccSuffix("/n32").includeSuffix("/n32").flag("+mabi=n32");

	Multilib M64 = Multilib()
	.gccSuffix("/64")
	.includeSuffix("/64")
	.flag("+m64")
	.flag("-m32")
	.flag("-mabi=n32");

	Multilib M32 =
	Multilib().gccSuffix("/32").flag("-m64").flag("+m32").flag("-mabi=n32");

	DebianMipsMultilibs =
	MultilibSet().Either(M32, M64, MAbiN32).FilterOut(NonExistent);
	}

	// Sort candidates. Toolchain that best meets the directories tree goes first.
	// Then select the first toolchains matches command line flags.
	MultilibSet *Candidates[] = {&CSMipsMultilibs, &DebianMipsMultilibs};
	if (CSMipsMultilibs.size() < DebianMipsMultilibs.size())
	std::iter_swap(Candidates, Candidates + 1);
	for (const MultilibSet *Candidate : Candidates) {
	if (Candidate->select(Flags, Result.SelectedMultilib)) {
	if (Candidate == &DebianMipsMultilibs)
	Result.BiarchSibling = Multilib();
	Result.Multilibs = *Candidate;
	return true;
	}
	}
	return false;
	}

	static bool findMipsAndroidMultilibs(llvm::vfs::FileSystem &VFS, StringRef Path,
	const Multilib::flags_list &Flags,
	FilterNonExistent &NonExistent,
	DetectedMultilibs &Result) {

	MultilibSet AndroidMipsMultilibs =
	MultilibSet()
	.Maybe(Multilib("/mips-r2").flag("+march=mips32r2"))
	.Maybe(Multilib("/mips-r6").flag("+march=mips32r6"))
	.FilterOut(NonExistent);

	MultilibSet AndroidMipselMultilibs =
	MultilibSet()
	.Either(Multilib().flag("+march=mips32"),
	Multilib("/mips-r2", "", "/mips-r2").flag("+march=mips32r2"),
	Multilib("/mips-r6", "", "/mips-r6").flag("+march=mips32r6"))
	.FilterOut(NonExistent);

	MultilibSet AndroidMips64elMultilibs =
	MultilibSet()
	.Either(
	Multilib().flag("+march=mips64r6"),
	Multilib("/32/mips-r1", "", "/mips-r1").flag("+march=mips32"),
	Multilib("/32/mips-r2", "", "/mips-r2").flag("+march=mips32r2"),
	Multilib("/32/mips-r6", "", "/mips-r6").flag("+march=mips32r6"))
	.FilterOut(NonExistent);

	MultilibSet *MS = &AndroidMipsMultilibs;
	if (VFS.exists(Path + "/mips-r6"))
	MS = &AndroidMipselMultilibs;
	else if (VFS.exists(Path + "/32"))
	MS = &AndroidMips64elMultilibs;
	if (MS->select(Flags, Result.SelectedMultilib)) {
	Result.Multilibs = *MS;
	return true;
	}
	return false;
	}

	static bool findMipsMuslMultilibs(const Multilib::flags_list &Flags,
	FilterNonExistent &NonExistent,
	DetectedMultilibs &Result) {
	// Musl toolchain multilibs
	MultilibSet MuslMipsMultilibs;
	{
	auto MArchMipsR2 = makeMultilib("")
	.osSuffix("/mips-r2-hard-musl")
	.flag("+EB")
	.flag("-EL")
	.flag("+march=mips32r2");

	auto MArchMipselR2 = makeMultilib("/mipsel-r2-hard-musl")
	.flag("-EB")
	.flag("+EL")
	.flag("+march=mips32r2");

	MuslMipsMultilibs = MultilibSet().Either(MArchMipsR2, MArchMipselR2);

	// Specify the callback that computes the include directories.
	MuslMipsMultilibs.setIncludeDirsCallback([](const Multilib &M) {
	return std::vector<std::string>(
	{"/../sysroot" + M.osSuffix() + "/usr/include"});
	});
	}
	if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
	Result.Multilibs = MuslMipsMultilibs;
	return true;
	}
	return false;
	}

	static bool findMipsMtiMultilibs(const Multilib::flags_list &Flags,
	FilterNonExistent &NonExistent,
	DetectedMultilibs &Result) {
	// CodeScape MTI toolchain v1.2 and early.
	MultilibSet MtiMipsMultilibsV1;
	{
	auto MArchMips32 = makeMultilib("/mips32")
	.flag("+m32")
	.flag("-m64")
	.flag("-mmicromips")
	.flag("+march=mips32");

	auto MArchMicroMips = makeMultilib("/micromips")
	.flag("+m32")
	.flag("-m64")
	.flag("+mmicromips");

	auto MArchMips64r2 = makeMultilib("/mips64r2")
	.flag("-m32")
	.flag("+m64")
	.flag("+march=mips64r2");

	auto MArchMips64 = makeMultilib("/mips64").flag("-m32").flag("+m64").flag(
	"-march=mips64r2");

	auto MArchDefault = makeMultilib("")
	.flag("+m32")
	.flag("-m64")
	.flag("-mmicromips")
	.flag("+march=mips32r2");

	auto Mips16 = makeMultilib("/mips16").flag("+mips16");

	auto UCLibc = makeMultilib("/uclibc").flag("+muclibc");

	auto MAbi64 =
	makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");

	auto BigEndian = makeMultilib("").flag("+EB").flag("-EL");

	auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");

	auto SoftFloat = makeMultilib("/sof").flag("+msoft-float");

	auto Nan2008 = makeMultilib("/nan2008").flag("+mnan=2008");

	MtiMipsMultilibsV1 =
	MultilibSet()
	.Either(MArchMips32, MArchMicroMips, MArchMips64r2, MArchMips64,
	MArchDefault)
	.Maybe(UCLibc)
	.Maybe(Mips16)
	.FilterOut("/mips64/mips16")
	.FilterOut("/mips64r2/mips16")
	.FilterOut("/micromips/mips16")
	.Maybe(MAbi64)
	.FilterOut("/micromips/64")
	.FilterOut("/mips32/64")
	.FilterOut("^/64")
	.FilterOut("/mips16/64")
	.Either(BigEndian, LittleEndian)
	.Maybe(SoftFloat)
	.Maybe(Nan2008)
	.FilterOut(".*sof/nan2008")
	.FilterOut(NonExistent)
	.setIncludeDirsCallback([](const Multilib &M) {
	std::vector<std::string> Dirs({"/include"});
	if (StringRef(M.includeSuffix()).startswith("/uclibc"))
	Dirs.push_back("/../../../../sysroot/uclibc/usr/include");
	else
	Dirs.push_back("/../../../../sysroot/usr/include");
	return Dirs;
	});
	}

	// CodeScape IMG toolchain starting from v1.3.
	MultilibSet MtiMipsMultilibsV2;
	{
	auto BeHard = makeMultilib("/mips-r2-hard")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("-mnan=2008")
	.flag("-muclibc");
	auto BeSoft = makeMultilib("/mips-r2-soft")
	.flag("+EB")
	.flag("+msoft-float")
	.flag("-mnan=2008");
	auto ElHard = makeMultilib("/mipsel-r2-hard")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("-mnan=2008")
	.flag("-muclibc");
	auto ElSoft = makeMultilib("/mipsel-r2-soft")
	.flag("+EL")
	.flag("+msoft-float")
	.flag("-mnan=2008")
	.flag("-mmicromips");
	auto BeHardNan = makeMultilib("/mips-r2-hard-nan2008")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("+mnan=2008")
	.flag("-muclibc");
	auto ElHardNan = makeMultilib("/mipsel-r2-hard-nan2008")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("+mnan=2008")
	.flag("-muclibc")
	.flag("-mmicromips");
	auto BeHardNanUclibc = makeMultilib("/mips-r2-hard-nan2008-uclibc")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("+mnan=2008")
	.flag("+muclibc");
	auto ElHardNanUclibc = makeMultilib("/mipsel-r2-hard-nan2008-uclibc")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("+mnan=2008")
	.flag("+muclibc");
	auto BeHardUclibc = makeMultilib("/mips-r2-hard-uclibc")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("-mnan=2008")
	.flag("+muclibc");
	auto ElHardUclibc = makeMultilib("/mipsel-r2-hard-uclibc")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("-mnan=2008")
	.flag("+muclibc");
	auto ElMicroHardNan = makeMultilib("/micromipsel-r2-hard-nan2008")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("+mnan=2008")
	.flag("+mmicromips");
	auto ElMicroSoft = makeMultilib("/micromipsel-r2-soft")
	.flag("+EL")
	.flag("+msoft-float")
	.flag("-mnan=2008")
	.flag("+mmicromips");

	auto O32 =
	makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
	auto N32 =
	makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
	auto N64 =
	makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");

	MtiMipsMultilibsV2 =
	MultilibSet()
	.Either({BeHard, BeSoft, ElHard, ElSoft, BeHardNan, ElHardNan,
	BeHardNanUclibc, ElHardNanUclibc, BeHardUclibc,
	ElHardUclibc, ElMicroHardNan, ElMicroSoft})
	.Either(O32, N32, N64)
	.FilterOut(NonExistent)
	.setIncludeDirsCallback([](const Multilib &M) {
	return std::vector<std::string>({"/../../../../sysroot" +
	M.includeSuffix() +
	"/../usr/include"});
	})
	.setFilePathsCallback([](const Multilib &M) {
	return std::vector<std::string>(
	{"/../../../../mips-mti-linux-gnu/lib" + M.gccSuffix()});
	});
	}
	for (auto Candidate : {&MtiMipsMultilibsV1, &MtiMipsMultilibsV2}) {
	if (Candidate->select(Flags, Result.SelectedMultilib)) {
	Result.Multilibs = *Candidate;
	return true;
	}
	}
	return false;
	}

	static bool findMipsImgMultilibs(const Multilib::flags_list &Flags,
	FilterNonExistent &NonExistent,
	DetectedMultilibs &Result) {
	// CodeScape IMG toolchain v1.2 and early.
	MultilibSet ImgMultilibsV1;
	{
	auto Mips64r6 = makeMultilib("/mips64r6").flag("+m64").flag("-m32");

	auto LittleEndian = makeMultilib("/el").flag("+EL").flag("-EB");

	auto MAbi64 =
	makeMultilib("/64").flag("+mabi=n64").flag("-mabi=n32").flag("-m32");

	ImgMultilibsV1 =
	MultilibSet()
	.Maybe(Mips64r6)
	.Maybe(MAbi64)
	.Maybe(LittleEndian)
	.FilterOut(NonExistent)
	.setIncludeDirsCallback([](const Multilib &M) {
	return std::vector<std::string>(
	{"/include", "/../../../../sysroot/usr/include"});
	});
	}

	// CodeScape IMG toolchain starting from v1.3.
	MultilibSet ImgMultilibsV2;
	{
	auto BeHard = makeMultilib("/mips-r6-hard")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("-mmicromips");
	auto BeSoft = makeMultilib("/mips-r6-soft")
	.flag("+EB")
	.flag("+msoft-float")
	.flag("-mmicromips");
	auto ElHard = makeMultilib("/mipsel-r6-hard")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("-mmicromips");
	auto ElSoft = makeMultilib("/mipsel-r6-soft")
	.flag("+EL")
	.flag("+msoft-float")
	.flag("-mmicromips");
	auto BeMicroHard = makeMultilib("/micromips-r6-hard")
	.flag("+EB")
	.flag("-msoft-float")
	.flag("+mmicromips");
	auto BeMicroSoft = makeMultilib("/micromips-r6-soft")
	.flag("+EB")
	.flag("+msoft-float")
	.flag("+mmicromips");
	auto ElMicroHard = makeMultilib("/micromipsel-r6-hard")
	.flag("+EL")
	.flag("-msoft-float")
	.flag("+mmicromips");
	auto ElMicroSoft = makeMultilib("/micromipsel-r6-soft")
	.flag("+EL")
	.flag("+msoft-float")
	.flag("+mmicromips");

	auto O32 =
	makeMultilib("/lib").osSuffix("").flag("-mabi=n32").flag("-mabi=n64");
	auto N32 =
	makeMultilib("/lib32").osSuffix("").flag("+mabi=n32").flag("-mabi=n64");
	auto N64 =
	makeMultilib("/lib64").osSuffix("").flag("-mabi=n32").flag("+mabi=n64");

	ImgMultilibsV2 =
	MultilibSet()
	.Either({BeHard, BeSoft, ElHard, ElSoft, BeMicroHard, BeMicroSoft,
	ElMicroHard, ElMicroSoft})
	.Either(O32, N32, N64)
	.FilterOut(NonExistent)
	.setIncludeDirsCallback([](const Multilib &M) {
	return std::vector<std::string>({"/../../../../sysroot" +
	M.includeSuffix() +
	"/../usr/include"});
	})
	.setFilePathsCallback([](const Multilib &M) {
	return std::vector<std::string>(
	{"/../../../../mips-img-linux-gnu/lib" + M.gccSuffix()});
	});
	}
	for (auto Candidate : {&ImgMultilibsV1, &ImgMultilibsV2}) {
	if (Candidate->select(Flags, Result.SelectedMultilib)) {
	Result.Multilibs = *Candidate;
	return true;
	}
	}
	return false;
	}

	bool clang::driver::findMIPSMultilibs(const Driver &D,
	const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	DetectedMultilibs &Result) {
	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());

	StringRef CPUName;
	StringRef ABIName;
	tools::mips::getMipsCPUAndABI(Args, TargetTriple, CPUName, ABIName);

	llvm::Triple::ArchType TargetArch = TargetTriple.getArch();

	Multilib::flags_list Flags;
	addMultilibFlag(TargetTriple.isMIPS32(), "m32", Flags);
	addMultilibFlag(TargetTriple.isMIPS64(), "m64", Flags);
	addMultilibFlag(isMips16(Args), "mips16", Flags);
	addMultilibFlag(CPUName == "mips32", "march=mips32", Flags);
	addMultilibFlag(CPUName == "mips32r2" \|\| CPUName == "mips32r3" \|\|
	CPUName == "mips32r5" \|\| CPUName == "p5600",
	"march=mips32r2", Flags);
	addMultilibFlag(CPUName == "mips32r6", "march=mips32r6", Flags);
	addMultilibFlag(CPUName == "mips64", "march=mips64", Flags);
	addMultilibFlag(CPUName == "mips64r2" \|\| CPUName == "mips64r3" \|\|
	CPUName == "mips64r5" \|\| CPUName == "octeon" \|\|
	CPUName == "octeon+",
	"march=mips64r2", Flags);
	addMultilibFlag(CPUName == "mips64r6", "march=mips64r6", Flags);
	addMultilibFlag(isMicroMips(Args), "mmicromips", Flags);
	addMultilibFlag(tools::mips::isUCLibc(Args), "muclibc", Flags);
	addMultilibFlag(tools::mips::isNaN2008(D, Args, TargetTriple), "mnan=2008",
	Flags);
	addMultilibFlag(ABIName == "n32", "mabi=n32", Flags);
	addMultilibFlag(ABIName == "n64", "mabi=n64", Flags);
	addMultilibFlag(isSoftFloatABI(Args), "msoft-float", Flags);
	addMultilibFlag(!isSoftFloatABI(Args), "mhard-float", Flags);
	addMultilibFlag(isMipsEL(TargetArch), "EL", Flags);
	addMultilibFlag(!isMipsEL(TargetArch), "EB", Flags);

	if (TargetTriple.isAndroid())
	return findMipsAndroidMultilibs(D.getVFS(), Path, Flags, NonExistent,
	Result);

	if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
	TargetTriple.getOS() == llvm::Triple::Linux &&
	TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
	return findMipsMuslMultilibs(Flags, NonExistent, Result);

	if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
	TargetTriple.getOS() == llvm::Triple::Linux &&
	TargetTriple.isGNUEnvironment())
	return findMipsMtiMultilibs(Flags, NonExistent, Result);

	if (TargetTriple.getVendor() == llvm::Triple::ImaginationTechnologies &&
	TargetTriple.getOS() == llvm::Triple::Linux &&
	TargetTriple.isGNUEnvironment())
	return findMipsImgMultilibs(Flags, NonExistent, Result);

	if (findMipsCsMultilibs(Flags, NonExistent, Result))
	return true;

	// Fallback to the regular toolchain-tree structure.
	Multilib Default;
	Result.Multilibs.push_back(Default);
	Result.Multilibs.FilterOut(NonExistent);

	if (Result.Multilibs.select(Flags, Result.SelectedMultilib)) {
	Result.BiarchSibling = Multilib();
	return true;
	}

	return false;
	}

	static void findAndroidArmMultilibs(const Driver &D,
	const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	DetectedMultilibs &Result) {
	// Find multilibs with subdirectories like armv7-a, thumb, armv7-a/thumb.
	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
	Multilib ArmV7Multilib = makeMultilib("/armv7-a")
	.flag("+march=armv7-a")
	.flag("-mthumb");
	Multilib ThumbMultilib = makeMultilib("/thumb")
	.flag("-march=armv7-a")
	.flag("+mthumb");
	Multilib ArmV7ThumbMultilib = makeMultilib("/armv7-a/thumb")
	.flag("+march=armv7-a")
	.flag("+mthumb");
	Multilib DefaultMultilib = makeMultilib("")
	.flag("-march=armv7-a")
	.flag("-mthumb");
	MultilibSet AndroidArmMultilibs =
	MultilibSet()
	.Either(ThumbMultilib, ArmV7Multilib,
	ArmV7ThumbMultilib, DefaultMultilib)
	.FilterOut(NonExistent);

	Multilib::flags_list Flags;
	llvm::StringRef Arch = Args.getLastArgValue(options::OPT_march_EQ);
	bool IsArmArch = TargetTriple.getArch() == llvm::Triple::arm;
	bool IsThumbArch = TargetTriple.getArch() == llvm::Triple::thumb;
	bool IsV7SubArch = TargetTriple.getSubArch() == llvm::Triple::ARMSubArch_v7;
	bool IsThumbMode = IsThumbArch \|\|
	Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, false) \|\|
	(IsArmArch && llvm::ARM::parseArchISA(Arch) == llvm::ARM::ISAKind::THUMB);
	bool IsArmV7Mode = (IsArmArch \|\| IsThumbArch) &&
	(llvm::ARM::parseArchVersion(Arch) == 7 \|\|
	(IsArmArch && Arch == "" && IsV7SubArch));
	addMultilibFlag(IsArmV7Mode, "march=armv7-a", Flags);
	addMultilibFlag(IsThumbMode, "mthumb", Flags);

	if (AndroidArmMultilibs.select(Flags, Result.SelectedMultilib))
	Result.Multilibs = AndroidArmMultilibs;
	}

	static bool findMSP430Multilibs(const Driver &D,
	const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	DetectedMultilibs &Result) {
	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
	Multilib WithoutExceptions = makeMultilib("/430").flag("-exceptions");
	Multilib WithExceptions = makeMultilib("/430/exceptions").flag("+exceptions");

	// FIXME: when clang starts to support msp430x ISA additional logic
	// to select between multilib must be implemented
	// Multilib MSP430xMultilib = makeMultilib("/large");

	Result.Multilibs.push_back(WithoutExceptions);
	Result.Multilibs.push_back(WithExceptions);
	Result.Multilibs.FilterOut(NonExistent);

	Multilib::flags_list Flags;
	addMultilibFlag(Args.hasFlag(options::OPT_fexceptions,
	options::OPT_fno_exceptions, false),
	"exceptions", Flags);
	if (Result.Multilibs.select(Flags, Result.SelectedMultilib))
	return true;

	return false;
	}

	static void findCSKYMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	DetectedMultilibs &Result) {
	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());

	tools::csky::FloatABI TheFloatABI = tools::csky::getCSKYFloatABI(D, Args);
	llvm::Optional<llvm::StringRef> Res = tools::csky::getCSKYArchName(D, Args, TargetTriple);

	if (!Res)
	return;
	auto ARCHName = *Res;

	Multilib::flags_list Flags;
	addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Hard, "hard-fp", Flags);
	addMultilibFlag(TheFloatABI == tools::csky::FloatABI::SoftFP, "soft-fp",
	Flags);
	addMultilibFlag(TheFloatABI == tools::csky::FloatABI::Soft, "soft", Flags);
	addMultilibFlag(ARCHName == "ck801", "march=ck801", Flags);
	addMultilibFlag(ARCHName == "ck802", "march=ck802", Flags);
	addMultilibFlag(ARCHName == "ck803", "march=ck803", Flags);
	addMultilibFlag(ARCHName == "ck804", "march=ck804", Flags);
	addMultilibFlag(ARCHName == "ck805", "march=ck805", Flags);
	addMultilibFlag(ARCHName == "ck807", "march=ck807", Flags);
	addMultilibFlag(ARCHName == "ck810", "march=ck810", Flags);
	addMultilibFlag(ARCHName == "ck810v", "march=ck810v", Flags);
	addMultilibFlag(ARCHName == "ck860", "march=ck860", Flags);
	addMultilibFlag(ARCHName == "ck860v", "march=ck860v", Flags);

	bool isBigEndian = false;
	if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian,
	options::OPT_mbig_endian))
	isBigEndian = !A->getOption().matches(options::OPT_mlittle_endian);
	addMultilibFlag(isBigEndian, "EB", Flags);

	auto HardFloat = makeMultilib("/hard-fp").flag("+hard-fp");
	auto SoftFpFloat = makeMultilib("/soft-fp").flag("+soft-fp");
	auto SoftFloat = makeMultilib("").flag("+soft");
	auto Arch801 = makeMultilib("/ck801").flag("+march=ck801");
	auto Arch802 = makeMultilib("/ck802").flag("+march=ck802");
	auto Arch803 = makeMultilib("/ck803").flag("+march=ck803");
	// CK804 use the same library as CK803
	auto Arch804 = makeMultilib("/ck803").flag("+march=ck804");
	auto Arch805 = makeMultilib("/ck805").flag("+march=ck805");
	auto Arch807 = makeMultilib("/ck807").flag("+march=ck807");
	auto Arch810 = makeMultilib("").flag("+march=ck810");
	auto Arch810v = makeMultilib("/ck810v").flag("+march=ck810v");
	auto Arch860 = makeMultilib("/ck860").flag("+march=ck860");
	auto Arch860v = makeMultilib("/ck860v").flag("+march=ck860v");
	auto BigEndian = makeMultilib("/big").flag("+EB");

	MultilibSet CSKYMultilibs =
	MultilibSet()
	.Maybe(BigEndian)
	.Either({Arch801, Arch802, Arch803, Arch804, Arch805, Arch807,
	Arch810, Arch810v, Arch860, Arch860v})
	.Either(HardFloat, SoftFpFloat, SoftFloat)
	.FilterOut(NonExistent);

	if (CSKYMultilibs.select(Flags, Result.SelectedMultilib))
	Result.Multilibs = CSKYMultilibs;
	}

	static void findRISCVBareMetalMultilibs(const Driver &D,
	const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	DetectedMultilibs &Result) {
	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
	struct RiscvMultilib {
	StringRef march;
	StringRef mabi;
	};
	// currently only support the set of multilibs like riscv-gnu-toolchain does.
	// TODO: support MULTILIB_REUSE
	constexpr RiscvMultilib RISCVMultilibSet[] = {
	{"rv32i", "ilp32"}, {"rv32im", "ilp32"}, {"rv32iac", "ilp32"},
	{"rv32imac", "ilp32"}, {"rv32imafc", "ilp32f"}, {"rv64imac", "lp64"},
	{"rv64imafdc", "lp64d"}};

	std::vector<Multilib> Ms;
	for (auto Element : RISCVMultilibSet) {
	// multilib path rule is ${march}/${mabi}
	Ms.emplace_back(
	makeMultilib((Twine(Element.march) + "/" + Twine(Element.mabi)).str())
	.flag(Twine("+march=", Element.march).str())
	.flag(Twine("+mabi=", Element.mabi).str()));
	}
	MultilibSet RISCVMultilibs =
	MultilibSet()
	.Either(ArrayRef<Multilib>(Ms))
	.FilterOut(NonExistent)
	.setFilePathsCallback([](const Multilib &M) {
	return std::vector<std::string>(
	{M.gccSuffix(),
	"/../../../../riscv64-unknown-elf/lib" + M.gccSuffix(),
	"/../../../../riscv32-unknown-elf/lib" + M.gccSuffix()});
	});


	Multilib::flags_list Flags;
	llvm::StringSet<> Added_ABIs;
	StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);
	StringRef MArch = tools::riscv::getRISCVArch(Args, TargetTriple);
	for (auto Element : RISCVMultilibSet) {
	addMultilibFlag(MArch == Element.march,
	Twine("march=", Element.march).str().c_str(), Flags);
	if (!Added_ABIs.count(Element.mabi)) {
	Added_ABIs.insert(Element.mabi);
	addMultilibFlag(ABIName == Element.mabi,
	Twine("mabi=", Element.mabi).str().c_str(), Flags);
	}
	}

	if (RISCVMultilibs.select(Flags, Result.SelectedMultilib))
	Result.Multilibs = RISCVMultilibs;
	}

	static void findRISCVMultilibs(const Driver &D,
	const llvm::Triple &TargetTriple, StringRef Path,
	const ArgList &Args, DetectedMultilibs &Result) {
	if (TargetTriple.getOS() == llvm::Triple::UnknownOS)
	return findRISCVBareMetalMultilibs(D, TargetTriple, Path, Args, Result);

	FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
	Multilib Ilp32 = makeMultilib("lib32/ilp32").flag("+m32").flag("+mabi=ilp32");
	Multilib Ilp32f =
	makeMultilib("lib32/ilp32f").flag("+m32").flag("+mabi=ilp32f");
	Multilib Ilp32d =
	makeMultilib("lib32/ilp32d").flag("+m32").flag("+mabi=ilp32d");
	Multilib Lp64 = makeMultilib("lib64/lp64").flag("+m64").flag("+mabi=lp64");
	Multilib Lp64f = makeMultilib("lib64/lp64f").flag("+m64").flag("+mabi=lp64f");
	Multilib Lp64d = makeMultilib("lib64/lp64d").flag("+m64").flag("+mabi=lp64d");
	MultilibSet RISCVMultilibs =
	MultilibSet()
	.Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d})
	.FilterOut(NonExistent);

	Multilib::flags_list Flags;
	bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64;
	StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple);

	addMultilibFlag(!IsRV64, "m32", Flags);
	addMultilibFlag(IsRV64, "m64", Flags);
	addMultilibFlag(ABIName == "ilp32", "mabi=ilp32", Flags);
	addMultilibFlag(ABIName == "ilp32f", "mabi=ilp32f", Flags);
	addMultilibFlag(ABIName == "ilp32d", "mabi=ilp32d", Flags);
	addMultilibFlag(ABIName == "lp64", "mabi=lp64", Flags);
	addMultilibFlag(ABIName == "lp64f", "mabi=lp64f", Flags);
	addMultilibFlag(ABIName == "lp64d", "mabi=lp64d", Flags);

	if (RISCVMultilibs.select(Flags, Result.SelectedMultilib))
	Result.Multilibs = RISCVMultilibs;
	}

	static bool findBiarchMultilibs(const Driver &D,
	const llvm::Triple &TargetTriple,
	StringRef Path, const ArgList &Args,
	bool NeedsBiarchSuffix,
	DetectedMultilibs &Result) {
	Multilib Default;

	// Some versions of SUSE and Fedora on ppc64 put 32-bit libs
	// in what would normally be GCCInstallPath and put the 64-bit
	// libs in a subdirectory named 64. The simple logic we follow is that
	// if there is a subdirectory of the right name with crtbegin.o in it,
	// we use that. If not, and if not a biarch triple alias, we look for
	// crtbegin.o without the subdirectory.

	StringRef Suff64 = "/64";
	// Solaris uses platform-specific suffixes instead of /64.
	if (TargetTriple.getOS() == llvm::Triple::Solaris) {
	switch (TargetTriple.getArch()) {
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	Suff64 = "/amd64";
	break;
	case llvm::Triple::sparc:
	case llvm::Triple::sparcv9:
	Suff64 = "/sparcv9";
	break;
	default:
	break;
	}
	}

	Multilib Alt64 = Multilib()
	.gccSuffix(Suff64)
	.includeSuffix(Suff64)
	.flag("-m32")
	.flag("+m64")
	.flag("-mx32");
	Multilib Alt32 = Multilib()
	.gccSuffix("/32")
	.includeSuffix("/32")
	.flag("+m32")
	.flag("-m64")
	.flag("-mx32");
	Multilib Altx32 = Multilib()
	.gccSuffix("/x32")
	.includeSuffix("/x32")
	.flag("-m32")
	.flag("-m64")
	.flag("+mx32");

	// GCC toolchain for IAMCU doesn't have crtbegin.o, so look for libgcc.a.
	FilterNonExistent NonExistent(
	Path, TargetTriple.isOSIAMCU() ? "/libgcc.a" : "/crtbegin.o", D.getVFS());

	// Determine default multilib from: 32, 64, x32
	// Also handle cases such as 64 on 32, 32 on 64, etc.
	enum { UNKNOWN, WANT32, WANT64, WANTX32 } Want = UNKNOWN;
	const bool IsX32 = TargetTriple.isX32();
	if (TargetTriple.isArch32Bit() && !NonExistent(Alt32))
	Want = WANT64;
	else if (TargetTriple.isArch64Bit() && IsX32 && !NonExistent(Altx32))
	Want = WANT64;
	else if (TargetTriple.isArch64Bit() && !IsX32 && !NonExistent(Alt64))
	Want = WANT32;
	else {
	if (TargetTriple.isArch32Bit())
	Want = NeedsBiarchSuffix ? WANT64 : WANT32;
	else if (IsX32)
	Want = NeedsBiarchSuffix ? WANT64 : WANTX32;
	else
	Want = NeedsBiarchSuffix ? WANT32 : WANT64;
	}

	if (Want == WANT32)
	Default.flag("+m32").flag("-m64").flag("-mx32");
	else if (Want == WANT64)
	Default.flag("-m32").flag("+m64").flag("-mx32");
	else if (Want == WANTX32)
	Default.flag("-m32").flag("-m64").flag("+mx32");
	else
	return false;

	Result.Multilibs.push_back(Default);
	Result.Multilibs.push_back(Alt64);
	Result.Multilibs.push_back(Alt32);
	Result.Multilibs.push_back(Altx32);

	Result.Multilibs.FilterOut(NonExistent);

	Multilib::flags_list Flags;
	addMultilibFlag(TargetTriple.isArch64Bit() && !IsX32, "m64", Flags);
	addMultilibFlag(TargetTriple.isArch32Bit(), "m32", Flags);
	addMultilibFlag(TargetTriple.isArch64Bit() && IsX32, "mx32", Flags);

	if (!Result.Multilibs.select(Flags, Result.SelectedMultilib))
	return false;

	if (Result.SelectedMultilib == Alt64 \|\| Result.SelectedMultilib == Alt32 \|\|
	Result.SelectedMultilib == Altx32)
	Result.BiarchSibling = Default;

	return true;
	}

	/// Generic_GCC - A tool chain using the 'gcc' command to perform
	/// all subcommands; this relies on gcc translating the majority of
	/// command line options.

	/// Less-than for GCCVersion, implementing a Strict Weak Ordering.
	bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor,
	int RHSPatch,
	StringRef RHSPatchSuffix) const {
	if (Major != RHSMajor)
	return Major < RHSMajor;
	if (Minor != RHSMinor)
	return Minor < RHSMinor;
	if (Patch != RHSPatch) {
	// Note that versions without a specified patch sort higher than those with
	// a patch.
	if (RHSPatch == -1)
	return true;
	if (Patch == -1)
	return false;

	// Otherwise just sort on the patch itself.
	return Patch < RHSPatch;
	}
	if (PatchSuffix != RHSPatchSuffix) {
	// Sort empty suffixes higher.
	if (RHSPatchSuffix.empty())
	return true;
	if (PatchSuffix.empty())
	return false;

	// Provide a lexicographic sort to make this a total ordering.
	return PatchSuffix < RHSPatchSuffix;
	}

	// The versions are equal.
	return false;
	}

	/// Parse a GCCVersion object out of a string of text.
	///
	/// This is the primary means of forming GCCVersion objects.
	/static/
	Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) {
	const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
	std::pair<StringRef, StringRef> First = VersionText.split('.');
	std::pair<StringRef, StringRef> Second = First.second.split('.');

	GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""};
	if (First.first.getAsInteger(10, GoodVersion.Major) \|\| GoodVersion.Major < 0)
	return BadVersion;
	GoodVersion.MajorStr = First.first.str();
	if (First.second.empty())
	return GoodVersion;
	StringRef MinorStr = Second.first;
	if (Second.second.empty()) {
	if (size_t EndNumber = MinorStr.find_first_not_of("0123456789")) {
	GoodVersion.PatchSuffix = std::string(MinorStr.substr(EndNumber));
	MinorStr = MinorStr.slice(0, EndNumber);
	}
	}
	if (MinorStr.getAsInteger(10, GoodVersion.Minor) \|\| GoodVersion.Minor < 0)
	return BadVersion;
	GoodVersion.MinorStr = MinorStr.str();

	// First look for a number prefix and parse that if present. Otherwise just
	// stash the entire patch string in the suffix, and leave the number
	// unspecified. This covers versions strings such as:
	// 5 (handled above)
	// 4.4
	// 4.4-patched
	// 4.4.0
	// 4.4.x
	// 4.4.2-rc4
	// 4.4.x-patched
	// And retains any patch number it finds.
	StringRef PatchText = Second.second;
	if (!PatchText.empty()) {
	if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) {
	// Try to parse the number and any suffix.
	if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) \|\|
	GoodVersion.Patch < 0)
	return BadVersion;
	GoodVersion.PatchSuffix = std::string(PatchText.substr(EndNumber));
	}
	}

	return GoodVersion;
	}

	static llvm::StringRef getGCCToolchainDir(const ArgList &Args,
	llvm::StringRef SysRoot) {
	const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain);
	if (A)
	return A->getValue();

	// If we have a SysRoot, ignore GCC_INSTALL_PREFIX.
	// GCC_INSTALL_PREFIX specifies the gcc installation for the default
	// sysroot and is likely not valid with a different sysroot.
	if (!SysRoot.empty())
	return "";

	return GCC_INSTALL_PREFIX;
	}

	/// Initialize a GCCInstallationDetector from the driver.
	///
	/// This performs all of the autodetection and sets up the various paths.
	/// Once constructed, a GCCInstallationDetector is essentially immutable.
	///
	/// FIXME: We shouldn't need an explicit TargetTriple parameter here, and
	/// should instead pull the target out of the driver. This is currently
	/// necessary because the driver doesn't store the final version of the target
	/// triple.
	void Generic_GCC::GCCInstallationDetector::init(
	const llvm::Triple &TargetTriple, const ArgList &Args,
	ArrayRef<std::string> ExtraTripleAliases) {
	llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit()
	? TargetTriple.get64BitArchVariant()
	: TargetTriple.get32BitArchVariant();
	// The library directories which may contain GCC installations.
	SmallVector<StringRef, 4> CandidateLibDirs, CandidateBiarchLibDirs;
	// The compatible GCC triples for this particular architecture.
	SmallVector<StringRef, 16> CandidateTripleAliases;
	SmallVector<StringRef, 16> CandidateBiarchTripleAliases;
	CollectLibDirsAndTriples(TargetTriple, BiarchVariantTriple, CandidateLibDirs,
	CandidateTripleAliases, CandidateBiarchLibDirs,
	CandidateBiarchTripleAliases);

	// Compute the set of prefixes for our search.
	SmallVector<std::string, 8> Prefixes;
	StringRef GCCToolchainDir = getGCCToolchainDir(Args, D.SysRoot);
	if (GCCToolchainDir != "") {
	if (GCCToolchainDir.back() == '/')
	GCCToolchainDir = GCCToolchainDir.drop_back(); // remove the /

	Prefixes.push_back(std::string(GCCToolchainDir));
	} else {
	// If we have a SysRoot, try that first.
	if (!D.SysRoot.empty()) {
	Prefixes.push_back(D.SysRoot);
	AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
	}

	// Then look for gcc installed alongside clang.
	Prefixes.push_back(D.InstalledDir + "/..");

	// Next, look for prefix(es) that correspond to distribution-supplied gcc
	// installations.
	if (D.SysRoot.empty()) {
	// Typically /usr.
	AddDefaultGCCPrefixes(TargetTriple, Prefixes, D.SysRoot);
	}

	// Try to respect gcc-config on Gentoo if --gcc-toolchain is not provided.
	// This avoids accidentally enforcing the system GCC version when using a
	// custom toolchain.
	SmallVector<StringRef, 16> GentooTestTriples;
	// Try to match an exact triple as target triple first.
	// e.g. crossdev -S x86_64-gentoo-linux-gnu will install gcc libs for
	// x86_64-gentoo-linux-gnu. But "clang -target x86_64-gentoo-linux-gnu"
	// may pick the libraries for x86_64-pc-linux-gnu even when exact matching
	// triple x86_64-gentoo-linux-gnu is present.
	GentooTestTriples.push_back(TargetTriple.str());
	// Check rest of triples.
	GentooTestTriples.append(ExtraTripleAliases.begin(),
	ExtraTripleAliases.end());
	GentooTestTriples.append(CandidateTripleAliases.begin(),
	CandidateTripleAliases.end());
	if (ScanGentooConfigs(TargetTriple, Args, GentooTestTriples,
	CandidateBiarchTripleAliases))
	return;
	}

	// Loop over the various components which exist and select the best GCC
	// installation available. GCC installs are ranked by version number.
	const GCCVersion VersionZero = GCCVersion::Parse("0.0.0");
	Version = VersionZero;
	for (const std::string &Prefix : Prefixes) {
	auto &VFS = D.getVFS();
	if (!VFS.exists(Prefix))
	continue;
	for (StringRef Suffix : CandidateLibDirs) {
	const std::string LibDir = concat(Prefix, Suffix);
	if (!VFS.exists(LibDir))
	continue;
	// Maybe filter out <libdir>/gcc and <libdir>/gcc-cross.
	bool GCCDirExists = VFS.exists(LibDir + "/gcc");
	bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross");
	// Try to match the exact target triple first.
	ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, TargetTriple.str(),
	false, GCCDirExists, GCCCrossDirExists);
	// Try rest of possible triples.
	for (StringRef Candidate : ExtraTripleAliases) // Try these first.
	ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, false,
	GCCDirExists, GCCCrossDirExists);
	for (StringRef Candidate : CandidateTripleAliases)
	ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, false,
	GCCDirExists, GCCCrossDirExists);
	}
	for (StringRef Suffix : CandidateBiarchLibDirs) {
	const std::string LibDir = Prefix + Suffix.str();
	if (!VFS.exists(LibDir))
	continue;
	bool GCCDirExists = VFS.exists(LibDir + "/gcc");
	bool GCCCrossDirExists = VFS.exists(LibDir + "/gcc-cross");
	for (StringRef Candidate : CandidateBiarchTripleAliases)
	ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate, true,
	GCCDirExists, GCCCrossDirExists);
	}

	// Skip other prefixes once a GCC installation is found.
	if (Version > VersionZero)
	break;
	}
	}

	void Generic_GCC::GCCInstallationDetector::print(raw_ostream &OS) const {
	for (const auto &InstallPath : CandidateGCCInstallPaths)
	OS << "Found candidate GCC installation: " << InstallPath << "\n";

	if (!GCCInstallPath.empty())
	OS << "Selected GCC installation: " << GCCInstallPath << "\n";

	for (const auto &Multilib : Multilibs)
	OS << "Candidate multilib: " << Multilib << "\n";

	if (Multilibs.size() != 0 \|\| !SelectedMultilib.isDefault())
	OS << "Selected multilib: " << SelectedMultilib << "\n";
	}

	bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
	if (BiarchSibling) {
	M = BiarchSibling.value();
	return true;
	}
	return false;
	}

	void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
	const llvm::Triple &TargetTriple, SmallVectorImpl<std::string> &Prefixes,
	StringRef SysRoot) {
	if (TargetTriple.getOS() == llvm::Triple::Solaris) {
	// Solaris is a special case.
	// The GCC installation is under
	// /usr/gcc/<major>.<minor>/lib/gcc/<triple>/<major>.<minor>.<patch>/
	// so we need to find those /usr/gcc/*/lib/gcc libdirs and go with
	// /usr/gcc/<version> as a prefix.

	std::string PrefixDir = concat(SysRoot, "/usr/gcc");
	std::error_code EC;
	for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(PrefixDir, EC),
	LE;
	!EC && LI != LE; LI = LI.increment(EC)) {
	StringRef VersionText = llvm::sys::path::filename(LI->path());
	GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);

	// Filter out obviously bad entries.
	if (CandidateVersion.Major == -1 \|\| CandidateVersion.isOlderThan(4, 1, 1))
	continue;

	std::string CandidatePrefix = PrefixDir + "/" + VersionText.str();
	std::string CandidateLibPath = CandidatePrefix + "/lib/gcc";
	if (!D.getVFS().exists(CandidateLibPath))
	continue;

	Prefixes.push_back(CandidatePrefix);
	}
	return;
	}

	// For Linux, if --sysroot is not specified, look for RHEL/CentOS devtoolsets
	// and gcc-toolsets.
	if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux &&
	D.getVFS().exists("/opt/rh")) {
	- // Find the directory in /opt/rh/ starting with gcc-toolset-* or
	- // devtoolset-* with the highest version number and add that
	- // one to our prefixes.
	- std::string ChosenToolsetDir;
	- unsigned ChosenToolsetVersion = 0;
	- std::error_code EC;
	- for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin("/opt/rh", EC),
	- LE;
	- !EC && LI != LE; LI = LI.increment(EC)) {
	- StringRef ToolsetDir = llvm::sys::path::filename(LI->path());
	- unsigned ToolsetVersion;
	- if ((!ToolsetDir.startswith("gcc-toolset-") &&
	- !ToolsetDir.startswith("devtoolset-")) \|\|
	- ToolsetDir.substr(ToolsetDir.rfind('-') + 1)
	- .getAsInteger(10, ToolsetVersion))
	- continue;
	-
	- if (ToolsetVersion > ChosenToolsetVersion) {
	- ChosenToolsetVersion = ToolsetVersion;
	- ChosenToolsetDir = "/opt/rh/" + ToolsetDir.str();
	- }
	- }
	-
	- if (ChosenToolsetVersion > 0)
	- Prefixes.push_back(ChosenToolsetDir + "/root/usr");
	+ // TODO: We may want to remove this, since the functionality
	+ // can be achieved using config files.
	+ Prefixes.push_back("/opt/rh/gcc-toolset-12/root/usr");
	+ Prefixes.push_back("/opt/rh/gcc-toolset-11/root/usr");
	+ Prefixes.push_back("/opt/rh/gcc-toolset-10/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-12/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-11/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-10/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-9/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-8/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-7/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-4/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-3/root/usr");
	+ Prefixes.push_back("/opt/rh/devtoolset-2/root/usr");
	}

	// Fall back to /usr which is used by most non-Solaris systems.
	Prefixes.push_back(concat(SysRoot, "/usr"));
	}

	/static/ void Generic_GCC::GCCInstallationDetector::CollectLibDirsAndTriples(
	const llvm::Triple &TargetTriple, const llvm::Triple &BiarchTriple,
	SmallVectorImpl<StringRef> &LibDirs,
	SmallVectorImpl<StringRef> &TripleAliases,
	SmallVectorImpl<StringRef> &BiarchLibDirs,
	SmallVectorImpl<StringRef> &BiarchTripleAliases) {
	// Declare a bunch of static data sets that we'll select between below. These
	// are specifically designed to always refer to string literals to avoid any
	// lifetime or initialization issues.
	//
	// The *Triples variables hard code some triples so that, for example,
	// --target=aarch64 (incomplete triple) can detect lib/aarch64-linux-gnu.
	// They are not needed when the user has correct LLVM_DEFAULT_TARGET_TRIPLE
	// and always uses the full --target (e.g. --target=aarch64-linux-gnu). The
	// lists should shrink over time. Please don't add more elements to *Triples.
	static const char *const AArch64LibDirs[] = {"/lib64", "/lib"};
	static const char *const AArch64Triples[] = {
	"aarch64-none-linux-gnu", "aarch64-linux-gnu", "aarch64-redhat-linux",
	"aarch64-suse-linux"};
	static const char *const AArch64beLibDirs[] = {"/lib"};
	static const char *const AArch64beTriples[] = {"aarch64_be-none-linux-gnu",
	"aarch64_be-linux-gnu"};

	static const char *const ARMLibDirs[] = {"/lib"};
	static const char *const ARMTriples[] = {"arm-linux-gnueabi"};
	static const char *const ARMHFTriples[] = {"arm-linux-gnueabihf",
	"armv7hl-redhat-linux-gnueabi",
	"armv6hl-suse-linux-gnueabi",
	"armv7hl-suse-linux-gnueabi"};
	static const char *const ARMebLibDirs[] = {"/lib"};
	static const char *const ARMebTriples[] = {"armeb-linux-gnueabi"};
	static const char *const ARMebHFTriples[] = {
	"armeb-linux-gnueabihf", "armebv7hl-redhat-linux-gnueabi"};

	static const char *const AVRLibDirs[] = {"/lib"};
	static const char *const AVRTriples[] = {"avr"};

	static const char *const CSKYLibDirs[] = {"/lib"};
	static const char *const CSKYTriples[] = {
	"csky-linux-gnuabiv2", "csky-linux-uclibcabiv2", "csky-elf-noneabiv2"};

	static const char *const X86_64LibDirs[] = {"/lib64", "/lib"};
	static const char *const X86_64Triples[] = {
	"x86_64-linux-gnu", "x86_64-unknown-linux-gnu",
	"x86_64-pc-linux-gnu", "x86_64-redhat-linux6E",
	"x86_64-redhat-linux", "x86_64-suse-linux",
	"x86_64-manbo-linux-gnu", "x86_64-linux-gnu",
	"x86_64-slackware-linux", "x86_64-unknown-linux",
	"x86_64-amazon-linux"};
	static const char *const X32Triples[] = {"x86_64-linux-gnux32",
	"x86_64-pc-linux-gnux32"};
	static const char *const X32LibDirs[] = {"/libx32", "/lib"};
	static const char *const X86LibDirs[] = {"/lib32", "/lib"};
	static const char *const X86Triples[] = {
	"i586-linux-gnu", "i686-linux-gnu", "i686-pc-linux-gnu",
	"i386-redhat-linux6E", "i686-redhat-linux", "i386-redhat-linux",
	"i586-suse-linux", "i686-montavista-linux", "i686-gnu",
	};

	static const char *const M68kLibDirs[] = {"/lib"};
	static const char *const M68kTriples[] = {
	"m68k-linux-gnu", "m68k-unknown-linux-gnu", "m68k-suse-linux"};

	static const char *const MIPSLibDirs[] = {"/libo32", "/lib"};
	static const char *const MIPSTriples[] = {
	"mips-linux-gnu", "mips-mti-linux", "mips-mti-linux-gnu",
	"mips-img-linux-gnu", "mipsisa32r6-linux-gnu"};
	static const char *const MIPSELLibDirs[] = {"/libo32", "/lib"};
	static const char *const MIPSELTriples[] = {
	"mipsel-linux-gnu", "mips-img-linux-gnu", "mipsisa32r6el-linux-gnu"};

	static const char *const MIPS64LibDirs[] = {"/lib64", "/lib"};
	static const char *const MIPS64Triples[] = {
	"mips64-linux-gnu", "mips-mti-linux-gnu",
	"mips-img-linux-gnu", "mips64-linux-gnuabi64",
	"mipsisa64r6-linux-gnu", "mipsisa64r6-linux-gnuabi64"};
	static const char *const MIPS64ELLibDirs[] = {"/lib64", "/lib"};
	static const char *const MIPS64ELTriples[] = {
	"mips64el-linux-gnu", "mips-mti-linux-gnu",
	"mips-img-linux-gnu", "mips64el-linux-gnuabi64",
	"mipsisa64r6el-linux-gnu", "mipsisa64r6el-linux-gnuabi64"};

	static const char *const MIPSN32LibDirs[] = {"/lib32"};
	static const char *const MIPSN32Triples[] = {"mips64-linux-gnuabin32",
	"mipsisa64r6-linux-gnuabin32"};
	static const char *const MIPSN32ELLibDirs[] = {"/lib32"};
	static const char *const MIPSN32ELTriples[] = {
	"mips64el-linux-gnuabin32", "mipsisa64r6el-linux-gnuabin32"};

	static const char *const MSP430LibDirs[] = {"/lib"};
	static const char *const MSP430Triples[] = {"msp430-elf"};

	static const char *const PPCLibDirs[] = {"/lib32", "/lib"};
	static const char *const PPCTriples[] = {
	"powerpc-linux-gnu", "powerpc-unknown-linux-gnu", "powerpc-linux-gnuspe",
	// On 32-bit PowerPC systems running SUSE Linux, gcc is configured as a
	// 64-bit compiler which defaults to "-m32", hence "powerpc64-suse-linux".
	"powerpc64-suse-linux", "powerpc-montavista-linuxspe"};
	static const char *const PPCLELibDirs[] = {"/lib32", "/lib"};
	static const char *const PPCLETriples[] = {"powerpcle-linux-gnu",
	"powerpcle-unknown-linux-gnu",
	"powerpcle-linux-musl"};

	static const char *const PPC64LibDirs[] = {"/lib64", "/lib"};
	static const char *const PPC64Triples[] = {
	"powerpc64-linux-gnu", "powerpc64-unknown-linux-gnu",
	"powerpc64-suse-linux", "ppc64-redhat-linux"};
	static const char *const PPC64LELibDirs[] = {"/lib64", "/lib"};
	static const char *const PPC64LETriples[] = {
	"powerpc64le-linux-gnu", "powerpc64le-unknown-linux-gnu",
	"powerpc64le-none-linux-gnu", "powerpc64le-suse-linux",
	"ppc64le-redhat-linux"};

	static const char *const RISCV32LibDirs[] = {"/lib32", "/lib"};
	static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu",
	"riscv32-linux-gnu",
	"riscv32-unknown-elf"};
	static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"};
	static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu",
	"riscv64-linux-gnu",
	"riscv64-unknown-elf"};

	static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"};
	static const char *const SPARCv8Triples[] = {"sparc-linux-gnu",
	"sparcv8-linux-gnu"};
	static const char *const SPARCv9LibDirs[] = {"/lib64", "/lib"};
	static const char *const SPARCv9Triples[] = {"sparc64-linux-gnu",
	"sparcv9-linux-gnu"};

	static const char *const SystemZLibDirs[] = {"/lib64", "/lib"};
	static const char *const SystemZTriples[] = {
	"s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
	"s390x-suse-linux", "s390x-redhat-linux"};


	using std::begin;
	using std::end;

	if (TargetTriple.getOS() == llvm::Triple::Solaris) {
	static const char *const SolarisLibDirs[] = {"/lib"};
	static const char *const SolarisSparcV8Triples[] = {
	"sparc-sun-solaris2.11", "sparc-sun-solaris2.12"};
	static const char *const SolarisSparcV9Triples[] = {
	"sparcv9-sun-solaris2.11", "sparcv9-sun-solaris2.12"};
	static const char *const SolarisX86Triples[] = {"i386-pc-solaris2.11",
	"i386-pc-solaris2.12"};
	static const char *const SolarisX86_64Triples[] = {"x86_64-pc-solaris2.11",
	"x86_64-pc-solaris2.12"};
	LibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
	BiarchLibDirs.append(begin(SolarisLibDirs), end(SolarisLibDirs));
	switch (TargetTriple.getArch()) {
	case llvm::Triple::x86:
	TripleAliases.append(begin(SolarisX86Triples), end(SolarisX86Triples));
	BiarchTripleAliases.append(begin(SolarisX86_64Triples),
	end(SolarisX86_64Triples));
	break;
	case llvm::Triple::x86_64:
	TripleAliases.append(begin(SolarisX86_64Triples),
	end(SolarisX86_64Triples));
	BiarchTripleAliases.append(begin(SolarisX86Triples),
	end(SolarisX86Triples));
	break;
	case llvm::Triple::sparc:
	TripleAliases.append(begin(SolarisSparcV8Triples),
	end(SolarisSparcV8Triples));
	BiarchTripleAliases.append(begin(SolarisSparcV9Triples),
	end(SolarisSparcV9Triples));
	break;
	case llvm::Triple::sparcv9:
	TripleAliases.append(begin(SolarisSparcV9Triples),
	end(SolarisSparcV9Triples));
	BiarchTripleAliases.append(begin(SolarisSparcV8Triples),
	end(SolarisSparcV8Triples));
	break;
	default:
	break;
	}
	return;
	}

	// Android targets should not use GNU/Linux tools or libraries.
	if (TargetTriple.isAndroid()) {
	static const char *const AArch64AndroidTriples[] = {
	"aarch64-linux-android"};
	static const char *const ARMAndroidTriples[] = {"arm-linux-androideabi"};
	static const char *const MIPSELAndroidTriples[] = {"mipsel-linux-android"};
	static const char *const MIPS64ELAndroidTriples[] = {
	"mips64el-linux-android"};
	static const char *const X86AndroidTriples[] = {"i686-linux-android"};
	static const char *const X86_64AndroidTriples[] = {"x86_64-linux-android"};

	switch (TargetTriple.getArch()) {
	case llvm::Triple::aarch64:
	LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
	TripleAliases.append(begin(AArch64AndroidTriples),
	end(AArch64AndroidTriples));
	break;
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs));
	TripleAliases.append(begin(ARMAndroidTriples), end(ARMAndroidTriples));
	break;
	case llvm::Triple::mipsel:
	LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
	TripleAliases.append(begin(MIPSELAndroidTriples),
	end(MIPSELAndroidTriples));
	BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
	BiarchTripleAliases.append(begin(MIPS64ELAndroidTriples),
	end(MIPS64ELAndroidTriples));
	break;
	case llvm::Triple::mips64el:
	LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
	TripleAliases.append(begin(MIPS64ELAndroidTriples),
	end(MIPS64ELAndroidTriples));
	BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
	BiarchTripleAliases.append(begin(MIPSELAndroidTriples),
	end(MIPSELAndroidTriples));
	break;
	case llvm::Triple::x86_64:
	LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
	TripleAliases.append(begin(X86_64AndroidTriples),
	end(X86_64AndroidTriples));
	BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs));
	BiarchTripleAliases.append(begin(X86AndroidTriples),
	end(X86AndroidTriples));
	break;
	case llvm::Triple::x86:
	LibDirs.append(begin(X86LibDirs), end(X86LibDirs));
	TripleAliases.append(begin(X86AndroidTriples), end(X86AndroidTriples));
	BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
	BiarchTripleAliases.append(begin(X86_64AndroidTriples),
	end(X86_64AndroidTriples));
	break;
	default:
	break;
	}

	return;
	}

	switch (TargetTriple.getArch()) {
	case llvm::Triple::aarch64:
	LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
	TripleAliases.append(begin(AArch64Triples), end(AArch64Triples));
	BiarchLibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
	BiarchTripleAliases.append(begin(AArch64Triples), end(AArch64Triples));
	break;
	case llvm::Triple::aarch64_be:
	LibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs));
	TripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples));
	BiarchLibDirs.append(begin(AArch64beLibDirs), end(AArch64beLibDirs));
	BiarchTripleAliases.append(begin(AArch64beTriples), end(AArch64beTriples));
	break;
	case llvm::Triple::arm:
	case llvm::Triple::thumb:
	LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs));
	if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
	TripleAliases.append(begin(ARMHFTriples), end(ARMHFTriples));
	} else {
	TripleAliases.append(begin(ARMTriples), end(ARMTriples));
	}
	break;
	case llvm::Triple::armeb:
	case llvm::Triple::thumbeb:
	LibDirs.append(begin(ARMebLibDirs), end(ARMebLibDirs));
	if (TargetTriple.getEnvironment() == llvm::Triple::GNUEABIHF) {
	TripleAliases.append(begin(ARMebHFTriples), end(ARMebHFTriples));
	} else {
	TripleAliases.append(begin(ARMebTriples), end(ARMebTriples));
	}
	break;
	case llvm::Triple::avr:
	LibDirs.append(begin(AVRLibDirs), end(AVRLibDirs));
	TripleAliases.append(begin(AVRTriples), end(AVRTriples));
	break;
	case llvm::Triple::csky:
	LibDirs.append(begin(CSKYLibDirs), end(CSKYLibDirs));
	TripleAliases.append(begin(CSKYTriples), end(CSKYTriples));
	break;
	case llvm::Triple::x86_64:
	if (TargetTriple.isX32()) {
	LibDirs.append(begin(X32LibDirs), end(X32LibDirs));
	TripleAliases.append(begin(X32Triples), end(X32Triples));
	BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
	BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
	} else {
	LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
	TripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
	BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs));
	BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
	}
	BiarchLibDirs.append(begin(X86LibDirs), end(X86LibDirs));
	BiarchTripleAliases.append(begin(X86Triples), end(X86Triples));
	break;
	case llvm::Triple::x86:
	LibDirs.append(begin(X86LibDirs), end(X86LibDirs));
	// MCU toolchain is 32 bit only and its triple alias is TargetTriple
	// itself, which will be appended below.
	if (!TargetTriple.isOSIAMCU()) {
	TripleAliases.append(begin(X86Triples), end(X86Triples));
	BiarchLibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs));
	BiarchTripleAliases.append(begin(X86_64Triples), end(X86_64Triples));
	BiarchLibDirs.append(begin(X32LibDirs), end(X32LibDirs));
	BiarchTripleAliases.append(begin(X32Triples), end(X32Triples));
	}
	break;
	case llvm::Triple::m68k:
	LibDirs.append(begin(M68kLibDirs), end(M68kLibDirs));
	TripleAliases.append(begin(M68kTriples), end(M68kTriples));
	break;
	case llvm::Triple::mips:
	LibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs));
	TripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
	BiarchLibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs));
	BiarchTripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples));
	BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs));
	BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples));
	break;
	case llvm::Triple::mipsel:
	LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
	TripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples));
	TripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
	BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
	BiarchTripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples));
	BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs));
	BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples));
	break;
	case llvm::Triple::mips64:
	LibDirs.append(begin(MIPS64LibDirs), end(MIPS64LibDirs));
	TripleAliases.append(begin(MIPS64Triples), end(MIPS64Triples));
	BiarchLibDirs.append(begin(MIPSLibDirs), end(MIPSLibDirs));
	BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
	BiarchLibDirs.append(begin(MIPSN32LibDirs), end(MIPSN32LibDirs));
	BiarchTripleAliases.append(begin(MIPSN32Triples), end(MIPSN32Triples));
	break;
	case llvm::Triple::mips64el:
	LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs));
	TripleAliases.append(begin(MIPS64ELTriples), end(MIPS64ELTriples));
	BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs));
	BiarchTripleAliases.append(begin(MIPSELTriples), end(MIPSELTriples));
	BiarchLibDirs.append(begin(MIPSN32ELLibDirs), end(MIPSN32ELLibDirs));
	BiarchTripleAliases.append(begin(MIPSN32ELTriples), end(MIPSN32ELTriples));
	BiarchTripleAliases.append(begin(MIPSTriples), end(MIPSTriples));
	break;
	case llvm::Triple::msp430:
	LibDirs.append(begin(MSP430LibDirs), end(MSP430LibDirs));
	TripleAliases.append(begin(MSP430Triples), end(MSP430Triples));
	break;
	case llvm::Triple::ppc:
	LibDirs.append(begin(PPCLibDirs), end(PPCLibDirs));
	TripleAliases.append(begin(PPCTriples), end(PPCTriples));
	BiarchLibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs));
	BiarchTripleAliases.append(begin(PPC64Triples), end(PPC64Triples));
	break;
	case llvm::Triple::ppcle:
	LibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs));
	TripleAliases.append(begin(PPCLETriples), end(PPCLETriples));
	BiarchLibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs));
	BiarchTripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
	break;
	case llvm::Triple::ppc64:
	LibDirs.append(begin(PPC64LibDirs), end(PPC64LibDirs));
	TripleAliases.append(begin(PPC64Triples), end(PPC64Triples));
	BiarchLibDirs.append(begin(PPCLibDirs), end(PPCLibDirs));
	BiarchTripleAliases.append(begin(PPCTriples), end(PPCTriples));
	break;
	case llvm::Triple::ppc64le:
	LibDirs.append(begin(PPC64LELibDirs), end(PPC64LELibDirs));
	TripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
	BiarchLibDirs.append(begin(PPCLELibDirs), end(PPCLELibDirs));
	BiarchTripleAliases.append(begin(PPCLETriples), end(PPCLETriples));
	break;
	case llvm::Triple::riscv32:
	LibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
	TripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples));
	BiarchLibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs));
	BiarchTripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples));
	break;
	case llvm::Triple::riscv64:
	LibDirs.append(begin(RISCV64LibDirs), end(RISCV64LibDirs));
	TripleAliases.append(begin(RISCV64Triples), end(RISCV64Triples));
	BiarchLibDirs.append(begin(RISCV32LibDirs), end(RISCV32LibDirs));
	BiarchTripleAliases.append(begin(RISCV32Triples), end(RISCV32Triples));
	break;
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	LibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
	TripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples));
	BiarchLibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs));
	BiarchTripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples));
	break;
	case llvm::Triple::sparcv9:
	LibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs));
	TripleAliases.append(begin(SPARCv9Triples), end(SPARCv9Triples));
	BiarchLibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
	BiarchTripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples));
	break;
	case llvm::Triple::systemz:
	LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs));
	TripleAliases.append(begin(SystemZTriples), end(SystemZTriples));
	break;
	default:
	// By default, just rely on the standard lib directories and the original
	// triple.
	break;
	}

	// Always append the drivers target triple to the end, in case it doesn't
	// match any of our aliases.
	TripleAliases.push_back(TargetTriple.str());

	// Also include the multiarch variant if it's different.
	if (TargetTriple.str() != BiarchTriple.str())
	BiarchTripleAliases.push_back(BiarchTriple.str());
	}

	bool Generic_GCC::GCCInstallationDetector::ScanGCCForMultilibs(
	const llvm::Triple &TargetTriple, const ArgList &Args,
	StringRef Path, bool NeedsBiarchSuffix) {
	llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
	DetectedMultilibs Detected;

	// Android standalone toolchain could have multilibs for ARM and Thumb.
	// Debian mips multilibs behave more like the rest of the biarch ones,
	// so handle them there
	if (isArmOrThumbArch(TargetArch) && TargetTriple.isAndroid()) {
	// It should also work without multilibs in a simplified toolchain.
	findAndroidArmMultilibs(D, TargetTriple, Path, Args, Detected);
	} else if (TargetTriple.isCSKY()) {
	findCSKYMultilibs(D, TargetTriple, Path, Args, Detected);
	} else if (TargetTriple.isMIPS()) {
	if (!findMIPSMultilibs(D, TargetTriple, Path, Args, Detected))
	return false;
	} else if (TargetTriple.isRISCV()) {
	findRISCVMultilibs(D, TargetTriple, Path, Args, Detected);
	} else if (isMSP430(TargetArch)) {
	findMSP430Multilibs(D, TargetTriple, Path, Args, Detected);
	} else if (TargetArch == llvm::Triple::avr) {
	// AVR has no multilibs.
	} else if (!findBiarchMultilibs(D, TargetTriple, Path, Args,
	NeedsBiarchSuffix, Detected)) {
	return false;
	}

	Multilibs = Detected.Multilibs;
	SelectedMultilib = Detected.SelectedMultilib;
	BiarchSibling = Detected.BiarchSibling;

	return true;
	}

	void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
	const llvm::Triple &TargetTriple, const ArgList &Args,
	const std::string &LibDir, StringRef CandidateTriple,
	bool NeedsBiarchSuffix, bool GCCDirExists, bool GCCCrossDirExists) {
	// Locations relative to the system lib directory where GCC's triple-specific
	// directories might reside.
	struct GCCLibSuffix {
	// Path from system lib directory to GCC triple-specific directory.
	std::string LibSuffix;
	// Path from GCC triple-specific directory back to system lib directory.
	// This is one '..' component per component in LibSuffix.
	StringRef ReversePath;
	// Whether this library suffix is relevant for the triple.
	bool Active;
	} Suffixes[] = {
	// This is the normal place.
	{"gcc/" + CandidateTriple.str(), "../..", GCCDirExists},

	// Debian puts cross-compilers in gcc-cross.
	{"gcc-cross/" + CandidateTriple.str(), "../..", GCCCrossDirExists},

	// The Freescale PPC SDK has the gcc libraries in
	// <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well. Only do
	// this on Freescale triples, though, since some systems put a lot of
	// files in that location, not just GCC installation data.
	{CandidateTriple.str(), "..",
	TargetTriple.getVendor() == llvm::Triple::Freescale \|\|
	TargetTriple.getVendor() == llvm::Triple::OpenEmbedded}};

	for (auto &Suffix : Suffixes) {
	if (!Suffix.Active)
	continue;

	StringRef LibSuffix = Suffix.LibSuffix;
	std::error_code EC;
	for (llvm::vfs::directory_iterator
	LI = D.getVFS().dir_begin(LibDir + "/" + LibSuffix, EC),
	LE;
	!EC && LI != LE; LI = LI.increment(EC)) {
	StringRef VersionText = llvm::sys::path::filename(LI->path());
	GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
	if (CandidateVersion.Major != -1) // Filter obviously bad entries.
	if (!CandidateGCCInstallPaths.insert(std::string(LI->path())).second)
	continue; // Saw this path before; no need to look at it again.
	if (CandidateVersion.isOlderThan(4, 1, 1))
	continue;
	if (CandidateVersion <= Version)
	continue;

	if (!ScanGCCForMultilibs(TargetTriple, Args, LI->path(),
	NeedsBiarchSuffix))
	continue;

	Version = CandidateVersion;
	GCCTriple.setTriple(CandidateTriple);
	// FIXME: We hack together the directory name here instead of
	// using LI to ensure stable path separators across Windows and
	// Linux.
	GCCInstallPath = (LibDir + "/" + LibSuffix + "/" + VersionText).str();
	GCCParentLibPath = (GCCInstallPath + "/../" + Suffix.ReversePath).str();
	IsValid = true;
	}
	}
	}

	bool Generic_GCC::GCCInstallationDetector::ScanGentooConfigs(
	const llvm::Triple &TargetTriple, const ArgList &Args,
	const SmallVectorImpl<StringRef> &CandidateTriples,
	const SmallVectorImpl<StringRef> &CandidateBiarchTriples) {
	if (!D.getVFS().exists(concat(D.SysRoot, GentooConfigDir)))
	return false;

	for (StringRef CandidateTriple : CandidateTriples) {
	if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple))
	return true;
	}

	for (StringRef CandidateTriple : CandidateBiarchTriples) {
	if (ScanGentooGccConfig(TargetTriple, Args, CandidateTriple, true))
	return true;
	}
	return false;
	}

	bool Generic_GCC::GCCInstallationDetector::ScanGentooGccConfig(
	const llvm::Triple &TargetTriple, const ArgList &Args,
	StringRef CandidateTriple, bool NeedsBiarchSuffix) {
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
	D.getVFS().getBufferForFile(concat(D.SysRoot, GentooConfigDir,
	"/config-" + CandidateTriple.str()));
	if (File) {
	SmallVector<StringRef, 2> Lines;
	File.get()->getBuffer().split(Lines, "\n");
	for (StringRef Line : Lines) {
	Line = Line.trim();
	// CURRENT=triple-version
	if (!Line.consume_front("CURRENT="))
	continue;
	// Process the config file pointed to by CURRENT.
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> ConfigFile =
	D.getVFS().getBufferForFile(
	concat(D.SysRoot, GentooConfigDir, "/" + Line));
	std::pair<StringRef, StringRef> ActiveVersion = Line.rsplit('-');
	// List of paths to scan for libraries.
	SmallVector<StringRef, 4> GentooScanPaths;
	// Scan the Config file to find installed GCC libraries path.
	// Typical content of the GCC config file:
	// LDPATH="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x:/usr/lib/gcc/
	// (continued from previous line) x86_64-pc-linux-gnu/4.9.x/32"
	// MANPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/man"
	// INFOPATH="/usr/share/gcc-data/x86_64-pc-linux-gnu/4.9.x/info"
	// STDCXX_INCDIR="/usr/lib/gcc/x86_64-pc-linux-gnu/4.9.x/include/g++-v4"
	// We are looking for the paths listed in LDPATH=... .
	if (ConfigFile) {
	SmallVector<StringRef, 2> ConfigLines;
	ConfigFile.get()->getBuffer().split(ConfigLines, "\n");
	for (StringRef ConfLine : ConfigLines) {
	ConfLine = ConfLine.trim();
	if (ConfLine.consume_front("LDPATH=")) {
	// Drop '"' from front and back if present.
	ConfLine.consume_back("\"");
	ConfLine.consume_front("\"");
	// Get all paths sperated by ':'
	ConfLine.split(GentooScanPaths, ':', -1, /AllowEmpty/ false);
	}
	}
	}
	// Test the path based on the version in /etc/env.d/gcc/config-{tuple}.
	std::string basePath = "/usr/lib/gcc/" + ActiveVersion.first.str() + "/"
	+ ActiveVersion.second.str();
	GentooScanPaths.push_back(StringRef(basePath));

	// Scan all paths for GCC libraries.
	for (const auto &GentooScanPath : GentooScanPaths) {
	std::string GentooPath = concat(D.SysRoot, GentooScanPath);
	if (D.getVFS().exists(GentooPath + "/crtbegin.o")) {
	if (!ScanGCCForMultilibs(TargetTriple, Args, GentooPath,
	NeedsBiarchSuffix))
	continue;

	Version = GCCVersion::Parse(ActiveVersion.second);
	GCCInstallPath = GentooPath;
	GCCParentLibPath = GentooPath + std::string("/../../..");
	GCCTriple.setTriple(ActiveVersion.first);
	IsValid = true;
	return true;
	}
	}
	}
	}

	return false;
	}

	Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
	const ArgList &Args)
	: ToolChain(D, Triple, Args), GCCInstallation(D),
	CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) {
	getProgramPaths().push_back(getDriver().getInstalledDir());
	if (getDriver().getInstalledDir() != getDriver().Dir)
	getProgramPaths().push_back(getDriver().Dir);
	}

	Generic_GCC::~Generic_GCC() {}

	Tool *Generic_GCC::getTool(Action::ActionClass AC) const {
	switch (AC) {
	case Action::PreprocessJobClass:
	if (!Preprocess)
	Preprocess.reset(new clang::driver::tools::gcc::Preprocessor(*this));
	return Preprocess.get();
	case Action::CompileJobClass:
	if (!Compile)
	Compile.reset(new tools::gcc::Compiler(*this));
	return Compile.get();
	default:
	return ToolChain::getTool(AC);
	}
	}

	Tool *Generic_GCC::buildAssembler() const {
	return new tools::gnutools::Assembler(*this);
	}

	Tool Generic_GCC::buildLinker() const { return new tools::gcc::Linker(this); }

	void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
	// Print the information about how we detected the GCC installation.
	GCCInstallation.print(OS);
	CudaInstallation.print(OS);
	RocmInstallation.print(OS);
	}

	bool Generic_GCC::IsUnwindTablesDefault(const ArgList &Args) const {
	switch (getArch()) {
	case llvm::Triple::aarch64:
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return true;
	default:
	return false;
	}
	}

	bool Generic_GCC::isPICDefault() const {
	switch (getArch()) {
	case llvm::Triple::x86_64:
	return getTriple().isOSWindows();
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	return true;
	default:
	return false;
	}
	}

	bool Generic_GCC::isPIEDefault(const llvm::opt::ArgList &Args) const {
	return false;
	}

	bool Generic_GCC::isPICDefaultForced() const {
	return getArch() == llvm::Triple::x86_64 && getTriple().isOSWindows();
	}

	bool Generic_GCC::IsIntegratedAssemblerDefault() const {
	switch (getTriple().getArch()) {
	case llvm::Triple::aarch64:
	case llvm::Triple::aarch64_be:
	case llvm::Triple::arm:
	case llvm::Triple::armeb:
	case llvm::Triple::avr:
	case llvm::Triple::bpfel:
	case llvm::Triple::bpfeb:
	case llvm::Triple::csky:
	case llvm::Triple::hexagon:
	case llvm::Triple::lanai:
	case llvm::Triple::m68k:
	case llvm::Triple::mips:
	case llvm::Triple::mipsel:
	case llvm::Triple::mips64:
	case llvm::Triple::mips64el:
	case llvm::Triple::msp430:
	case llvm::Triple::ppc:
	case llvm::Triple::ppcle:
	case llvm::Triple::ppc64:
	case llvm::Triple::ppc64le:
	case llvm::Triple::riscv32:
	case llvm::Triple::riscv64:
	case llvm::Triple::sparc:
	case llvm::Triple::sparcel:
	case llvm::Triple::sparcv9:
	case llvm::Triple::systemz:
	case llvm::Triple::thumb:
	case llvm::Triple::thumbeb:
	case llvm::Triple::ve:
	case llvm::Triple::x86:
	case llvm::Triple::x86_64:
	return true;
	default:
	return false;
	}
	}

	void Generic_GCC::PushPPaths(ToolChain::path_list &PPaths) {
	// Cross-compiling binutils and GCC installations (vanilla and openSUSE at
	// least) put various tools in a triple-prefixed directory off of the parent
	// of the GCC installation. We use the GCC triple here to ensure that we end
	// up with tools that support the same amount of cross compiling as the
	// detected GCC installation. For example, if we find a GCC installation
	// targeting x86_64, but it is a bi-arch GCC installation, it can also be
	// used to target i386.
	if (GCCInstallation.isValid()) {
	PPaths.push_back(Twine(GCCInstallation.getParentLibPath() + "/../" +
	GCCInstallation.getTriple().str() + "/bin")
	.str());
	}
	}

	void Generic_GCC::AddMultilibPaths(const Driver &D,
	const std::string &SysRoot,
	const std::string &OSLibDir,
	const std::string &MultiarchTriple,
	path_list &Paths) {
	// Add the multilib suffixed paths where they are available.
	if (GCCInstallation.isValid()) {
	const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
	const std::string &LibPath =
	std::string(GCCInstallation.getParentLibPath());

	// Sourcery CodeBench MIPS toolchain holds some libraries under
	// a biarch-like suffix of the GCC installation.
	if (const auto &PathsCallback = Multilibs.filePathsCallback())
	for (const auto &Path : PathsCallback(SelectedMultilib))
	addPathIfExists(D, GCCInstallation.getInstallPath() + Path, Paths);

	// Add lib/gcc/$triple/$version, with an optional /multilib suffix.
	addPathIfExists(
	D, GCCInstallation.getInstallPath() + SelectedMultilib.gccSuffix(),
	Paths);

	// Add lib/gcc/$triple/$libdir
	// For GCC built with --enable-version-specific-runtime-libs.
	addPathIfExists(D, GCCInstallation.getInstallPath() + "/../" + OSLibDir,
	Paths);

	// GCC cross compiling toolchains will install target libraries which ship
	// as part of the toolchain under <prefix>/<triple>/<libdir> rather than as
	// any part of the GCC installation in
	// <prefix>/<libdir>/gcc/<triple>/<version>. This decision is somewhat
	// debatable, but is the reality today. We need to search this tree even
	// when we have a sysroot somewhere else. It is the responsibility of
	// whomever is doing the cross build targeting a sysroot using a GCC
	// installation that is not within the system root to ensure two things:
	//
	// 1) Any DSOs that are linked in from this tree or from the install path
	// above must be present on the system root and found via an
	// appropriate rpath.
	// 2) There must not be libraries installed into
	// <prefix>/<triple>/<libdir> unless they should be preferred over
	// those within the system root.
	//
	// Note that this matches the GCC behavior. See the below comment for where
	// Clang diverges from GCC's behavior.
	addPathIfExists(D,
	LibPath + "/../" + GCCTriple.str() + "/lib/../" + OSLibDir +
	SelectedMultilib.osSuffix(),
	Paths);

	// If the GCC installation we found is inside of the sysroot, we want to
	// prefer libraries installed in the parent prefix of the GCC installation.
	// It is important to not use these paths when the GCC installation is
	// outside of the system root as that can pick up unintended libraries.
	// This usually happens when there is an external cross compiler on the
	// host system, and a more minimal sysroot available that is the target of
	// the cross. Note that GCC does include some of these directories in some
	// configurations but this seems somewhere between questionable and simply
	// a bug.
	if (StringRef(LibPath).startswith(SysRoot))
	addPathIfExists(D, LibPath + "/../" + OSLibDir, Paths);
	}
	}

	void Generic_GCC::AddMultiarchPaths(const Driver &D,
	const std::string &SysRoot,
	const std::string &OSLibDir,
	path_list &Paths) {
	if (GCCInstallation.isValid()) {
	const std::string &LibPath =
	std::string(GCCInstallation.getParentLibPath());
	const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
	const Multilib &Multilib = GCCInstallation.getMultilib();
	addPathIfExists(
	D, LibPath + "/../" + GCCTriple.str() + "/lib" + Multilib.osSuffix(),
	Paths);
	}
	}

	void Generic_GCC::AddMultilibIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	// Add include directories specific to the selected multilib set and multilib.
	if (!GCCInstallation.isValid())
	return;
	// gcc TOOL_INCLUDE_DIR.
	const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
	std::string LibPath(GCCInstallation.getParentLibPath());
	addSystemInclude(DriverArgs, CC1Args,
	Twine(LibPath) + "/../" + GCCTriple.str() + "/include");

	const auto &Callback = Multilibs.includeDirsCallback();
	if (Callback) {
	for (const auto &Path : Callback(GCCInstallation.getMultilib()))
	addExternCSystemIncludeIfExists(DriverArgs, CC1Args,
	GCCInstallation.getInstallPath() + Path);
	}
	}

	void Generic_GCC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
	ArgStringList &CC1Args) const {
	if (DriverArgs.hasArg(options::OPT_nostdinc, options::OPT_nostdincxx,
	options::OPT_nostdlibinc))
	return;

	switch (GetCXXStdlibType(DriverArgs)) {
	case ToolChain::CST_Libcxx:
	addLibCxxIncludePaths(DriverArgs, CC1Args);
	break;

	case ToolChain::CST_Libstdcxx:
	addLibStdCxxIncludePaths(DriverArgs, CC1Args);
	break;
	}
	}

	void
	Generic_GCC::addLibCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const {
	const Driver &D = getDriver();
	std::string SysRoot = computeSysRoot();
	std::string Target = getTripleString();

	auto AddIncludePath = [&](std::string Path) {
	std::string Version = detectLibcxxVersion(Path);
	if (Version.empty())
	return false;

	// First add the per-target include path if it exists.
	std::string TargetDir = Path + "/" + Target + "/c++/" + Version;
	if (D.getVFS().exists(TargetDir))
	addSystemInclude(DriverArgs, CC1Args, TargetDir);

	// Second add the generic one.
	addSystemInclude(DriverArgs, CC1Args, Path + "/c++/" + Version);
	return true;
	};

	// Android never uses the libc++ headers installed alongside the toolchain,
	// which are generally incompatible with the NDK libraries anyway.
	if (!getTriple().isAndroid())
	if (AddIncludePath(getDriver().Dir + "/../include"))
	return;
	// If this is a development, non-installed, clang, libcxx will
	// not be found at ../include/c++ but it likely to be found at
	// one of the following two locations:
	if (AddIncludePath(concat(SysRoot, "/usr/local/include")))
	return;
	if (AddIncludePath(concat(SysRoot, "/usr/include")))
	return;
	}

	bool Generic_GCC::addLibStdCXXIncludePaths(Twine IncludeDir, StringRef Triple,
	Twine IncludeSuffix,
	const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args,
	bool DetectDebian) const {
	if (!getVFS().exists(IncludeDir))
	return false;

	// Debian native gcc uses g++-multiarch-incdir.diff which uses
	// include/x86_64-linux-gnu/c++/10$IncludeSuffix instead of
	// include/c++/10/x86_64-linux-gnu$IncludeSuffix.
	std::string Dir = IncludeDir.str();
	StringRef Include =
	llvm::sys::path::parent_path(llvm::sys::path::parent_path(Dir));
	std::string Path =
	(Include + "/" + Triple + Dir.substr(Include.size()) + IncludeSuffix)
	.str();
	if (DetectDebian && !getVFS().exists(Path))
	return false;

	// GPLUSPLUS_INCLUDE_DIR
	addSystemInclude(DriverArgs, CC1Args, IncludeDir);
	// GPLUSPLUS_TOOL_INCLUDE_DIR. If Triple is not empty, add a target-dependent
	// include directory.
	if (DetectDebian)
	addSystemInclude(DriverArgs, CC1Args, Path);
	else if (!Triple.empty())
	addSystemInclude(DriverArgs, CC1Args,
	IncludeDir + "/" + Triple + IncludeSuffix);
	// GPLUSPLUS_BACKWARD_INCLUDE_DIR
	addSystemInclude(DriverArgs, CC1Args, IncludeDir + "/backward");
	return true;
	}

	bool Generic_GCC::addGCCLibStdCxxIncludePaths(
	const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
	StringRef DebianMultiarch) const {
	assert(GCCInstallation.isValid());

	// By default, look for the C++ headers in an include directory adjacent to
	// the lib directory of the GCC installation. Note that this is expect to be
	// equivalent to '/usr/include/c++/X.Y' in almost all cases.
	StringRef LibDir = GCCInstallation.getParentLibPath();
	StringRef InstallDir = GCCInstallation.getInstallPath();
	StringRef TripleStr = GCCInstallation.getTriple().str();
	const Multilib &Multilib = GCCInstallation.getMultilib();
	const GCCVersion &Version = GCCInstallation.getVersion();

	// Try /../$triple/include/c++/$version (gcc --print-multiarch is not empty).
	if (addLibStdCXXIncludePaths(
	LibDir.str() + "/../" + TripleStr + "/include/c++/" + Version.Text,
	TripleStr, Multilib.includeSuffix(), DriverArgs, CC1Args))
	return true;

	// Try /gcc/$triple/$version/include/c++/ (gcc --print-multiarch is not
	// empty). Like above but for GCC built with
	// --enable-version-specific-runtime-libs.
	if (addLibStdCXXIncludePaths(LibDir.str() + "/gcc/" + TripleStr + "/" +
	Version.Text + "/include/c++/",
	TripleStr, Multilib.includeSuffix(), DriverArgs,
	CC1Args))
	return true;

	// Detect Debian g++-multiarch-incdir.diff.
	if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
	DebianMultiarch, Multilib.includeSuffix(),
	DriverArgs, CC1Args, /Debian=/true))
	return true;

	// Try /../include/c++/$version (gcc --print-multiarch is empty).
	if (addLibStdCXXIncludePaths(LibDir.str() + "/../include/c++/" + Version.Text,
	TripleStr, Multilib.includeSuffix(), DriverArgs,
	CC1Args))
	return true;

	// Otherwise, fall back on a bunch of options which don't use multiarch
	// layouts for simplicity.
	const std::string LibStdCXXIncludePathCandidates[] = {
	// Gentoo is weird and places its headers inside the GCC install,
	// so if the first attempt to find the headers fails, try these patterns.
	InstallDir.str() + "/include/g++-v" + Version.Text,
	InstallDir.str() + "/include/g++-v" + Version.MajorStr + "." +
	Version.MinorStr,
	InstallDir.str() + "/include/g++-v" + Version.MajorStr,
	};

	for (const auto &IncludePath : LibStdCXXIncludePathCandidates) {
	if (addLibStdCXXIncludePaths(IncludePath, TripleStr,
	Multilib.includeSuffix(), DriverArgs, CC1Args))
	return true;
	}
	return false;
	}

	void
	Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs,
	llvm::opt::ArgStringList &CC1Args) const {
	if (GCCInstallation.isValid()) {
	addGCCLibStdCxxIncludePaths(DriverArgs, CC1Args,
	GCCInstallation.getTriple().str());
	}
	}

	llvm::opt::DerivedArgList *
	Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef,
	Action::OffloadKind DeviceOffloadKind) const {

	// If this tool chain is used for an OpenMP offloading device we have to make
	// sure we always generate a shared library regardless of the commands the
	// user passed to the host. This is required because the runtime library
	// is required to load the device image dynamically at run time.
	if (DeviceOffloadKind == Action::OFK_OpenMP) {
	DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
	const OptTable &Opts = getDriver().getOpts();

	// Request the shared library. Given that these options are decided
	// implicitly, they do not refer to any base argument.
	DAL->AddFlagArg(/BaseArg=/nullptr, Opts.getOption(options::OPT_shared));
	DAL->AddFlagArg(/BaseArg=/nullptr, Opts.getOption(options::OPT_fPIC));

	// Filter all the arguments we don't care passing to the offloading
	// toolchain as they can mess up with the creation of a shared library.
	for (auto *A : Args) {
	switch ((options::ID)A->getOption().getID()) {
	default:
	DAL->append(A);
	break;
	case options::OPT_shared:
	case options::OPT_dynamic:
	case options::OPT_static:
	case options::OPT_fPIC:
	case options::OPT_fno_PIC:
	case options::OPT_fpic:
	case options::OPT_fno_pic:
	case options::OPT_fPIE:
	case options::OPT_fno_PIE:
	case options::OPT_fpie:
	case options::OPT_fno_pie:
	break;
	}
	}
	return DAL;
	}
	return nullptr;
	}

	void Generic_ELF::anchor() {}

	void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
	ArgStringList &CC1Args,
	Action::OffloadKind) const {
	if (!DriverArgs.hasFlag(options::OPT_fuse_init_array,
	options::OPT_fno_use_init_array, true))
	CC1Args.push_back("-fno-use-init-array");
	}
	diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
	index 68158ec977cf..5d0d87fd2422 100644
	--- a/clang/lib/Sema/SemaLookup.cpp
	+++ b/clang/lib/Sema/SemaLookup.cpp
	@@ -1,5838 +1,5836 @@
	//===--------------------- SemaLookup.cpp - Name Lookup ------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements name lookup for C, C++, Objective-C, and
	// Objective-C++.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclLookups.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/ModuleLoader.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/Overload.h"
	#include "clang/Sema/RISCVIntrinsicManager.h"
	#include "clang/Sema/Scope.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/Sema.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/TemplateDeduction.h"
	#include "clang/Sema/TypoCorrection.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/TinyPtrVector.h"
	#include "llvm/ADT/edit_distance.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <algorithm>
	#include <iterator>
	#include <list>
	#include <set>
	#include <utility>
	#include <vector>

	#include "OpenCLBuiltins.inc"

	using namespace clang;
	using namespace sema;

	namespace {
	class UnqualUsingEntry {
	const DeclContext *Nominated;
	const DeclContext *CommonAncestor;

	public:
	UnqualUsingEntry(const DeclContext *Nominated,
	const DeclContext *CommonAncestor)
	: Nominated(Nominated), CommonAncestor(CommonAncestor) {
	}

	const DeclContext *getCommonAncestor() const {
	return CommonAncestor;
	}

	const DeclContext *getNominatedNamespace() const {
	return Nominated;
	}

	// Sort by the pointer value of the common ancestor.
	struct Comparator {
	bool operator()(const UnqualUsingEntry &L, const UnqualUsingEntry &R) {
	return L.getCommonAncestor() < R.getCommonAncestor();
	}

	bool operator()(const UnqualUsingEntry &E, const DeclContext *DC) {
	return E.getCommonAncestor() < DC;
	}

	bool operator()(const DeclContext *DC, const UnqualUsingEntry &E) {
	return DC < E.getCommonAncestor();
	}
	};
	};

	/// A collection of using directives, as used by C++ unqualified
	/// lookup.
	class UnqualUsingDirectiveSet {
	Sema &SemaRef;

	typedef SmallVector<UnqualUsingEntry, 8> ListTy;

	ListTy list;
	llvm::SmallPtrSet<DeclContext*, 8> visited;

	public:
	UnqualUsingDirectiveSet(Sema &SemaRef) : SemaRef(SemaRef) {}

	void visitScopeChain(Scope S, Scope InnermostFileScope) {
	// C++ [namespace.udir]p1:
	// During unqualified name lookup, the names appear as if they
	// were declared in the nearest enclosing namespace which contains
	// both the using-directive and the nominated namespace.
	DeclContext *InnermostFileDC = InnermostFileScope->getEntity();
	assert(InnermostFileDC && InnermostFileDC->isFileContext());

	for (; S; S = S->getParent()) {
	// C++ [namespace.udir]p1:
	// A using-directive shall not appear in class scope, but may
	// appear in namespace scope or in block scope.
	DeclContext *Ctx = S->getEntity();
	if (Ctx && Ctx->isFileContext()) {
	visit(Ctx, Ctx);
	} else if (!Ctx \|\| Ctx->isFunctionOrMethod()) {
	for (auto *I : S->using_directives())
	if (SemaRef.isVisible(I))
	visit(I, InnermostFileDC);
	}
	}
	}

	// Visits a context and collect all of its using directives
	// recursively. Treats all using directives as if they were
	// declared in the context.
	//
	// A given context is only every visited once, so it is important
	// that contexts be visited from the inside out in order to get
	// the effective DCs right.
	void visit(DeclContext DC, DeclContext EffectiveDC) {
	if (!visited.insert(DC).second)
	return;

	addUsingDirectives(DC, EffectiveDC);
	}

	// Visits a using directive and collects all of its using
	// directives recursively. Treats all using directives as if they
	// were declared in the effective DC.
	void visit(UsingDirectiveDecl UD, DeclContext EffectiveDC) {
	DeclContext *NS = UD->getNominatedNamespace();
	if (!visited.insert(NS).second)
	return;

	addUsingDirective(UD, EffectiveDC);
	addUsingDirectives(NS, EffectiveDC);
	}

	// Adds all the using directives in a context (and those nominated
	// by its using directives, transitively) as if they appeared in
	// the given effective context.
	void addUsingDirectives(DeclContext DC, DeclContext EffectiveDC) {
	SmallVector<DeclContext*, 4> queue;
	while (true) {
	for (auto UD : DC->using_directives()) {
	DeclContext *NS = UD->getNominatedNamespace();
	if (SemaRef.isVisible(UD) && visited.insert(NS).second) {
	addUsingDirective(UD, EffectiveDC);
	queue.push_back(NS);
	}
	}

	if (queue.empty())
	return;

	DC = queue.pop_back_val();
	}
	}

	// Add a using directive as if it had been declared in the given
	// context. This helps implement C++ [namespace.udir]p3:
	// The using-directive is transitive: if a scope contains a
	// using-directive that nominates a second namespace that itself
	// contains using-directives, the effect is as if the
	// using-directives from the second namespace also appeared in
	// the first.
	void addUsingDirective(UsingDirectiveDecl UD, DeclContext EffectiveDC) {
	// Find the common ancestor between the effective context and
	// the nominated namespace.
	DeclContext *Common = UD->getNominatedNamespace();
	while (!Common->Encloses(EffectiveDC))
	Common = Common->getParent();
	Common = Common->getPrimaryContext();

	list.push_back(UnqualUsingEntry(UD->getNominatedNamespace(), Common));
	}

	void done() { llvm::sort(list, UnqualUsingEntry::Comparator()); }

	typedef ListTy::const_iterator const_iterator;

	const_iterator begin() const { return list.begin(); }
	const_iterator end() const { return list.end(); }

	llvm::iterator_range<const_iterator>
	getNamespacesFor(DeclContext *DC) const {
	return llvm::make_range(std::equal_range(begin(), end(),
	DC->getPrimaryContext(),
	UnqualUsingEntry::Comparator()));
	}
	};
	} // end anonymous namespace

	// Retrieve the set of identifier namespaces that correspond to a
	// specific kind of name lookup.
	static inline unsigned getIDNS(Sema::LookupNameKind NameKind,
	bool CPlusPlus,
	bool Redeclaration) {
	unsigned IDNS = 0;
	switch (NameKind) {
	case Sema::LookupObjCImplicitSelfParam:
	case Sema::LookupOrdinaryName:
	case Sema::LookupRedeclarationWithLinkage:
	case Sema::LookupLocalFriendName:
	case Sema::LookupDestructorName:
	IDNS = Decl::IDNS_Ordinary;
	if (CPlusPlus) {
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_Member \| Decl::IDNS_Namespace;
	if (Redeclaration)
	IDNS \|= Decl::IDNS_TagFriend \| Decl::IDNS_OrdinaryFriend;
	}
	if (Redeclaration)
	IDNS \|= Decl::IDNS_LocalExtern;
	break;

	case Sema::LookupOperatorName:
	// Operator lookup is its own crazy thing; it is not the same
	// as (e.g.) looking up an operator name for redeclaration.
	assert(!Redeclaration && "cannot do redeclaration operator lookup");
	IDNS = Decl::IDNS_NonMemberOperator;
	break;

	case Sema::LookupTagName:
	if (CPlusPlus) {
	IDNS = Decl::IDNS_Type;

	// When looking for a redeclaration of a tag name, we add:
	// 1) TagFriend to find undeclared friend decls
	// 2) Namespace because they can't "overload" with tag decls.
	// 3) Tag because it includes class templates, which can't
	// "overload" with tag decls.
	if (Redeclaration)
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_TagFriend \| Decl::IDNS_Namespace;
	} else {
	IDNS = Decl::IDNS_Tag;
	}
	break;

	case Sema::LookupLabel:
	IDNS = Decl::IDNS_Label;
	break;

	case Sema::LookupMemberName:
	IDNS = Decl::IDNS_Member;
	if (CPlusPlus)
	IDNS \|= Decl::IDNS_Tag \| Decl::IDNS_Ordinary;
	break;

	case Sema::LookupNestedNameSpecifierName:
	IDNS = Decl::IDNS_Type \| Decl::IDNS_Namespace;
	break;

	case Sema::LookupNamespaceName:
	IDNS = Decl::IDNS_Namespace;
	break;

	case Sema::LookupUsingDeclName:
	assert(Redeclaration && "should only be used for redecl lookup");
	IDNS = Decl::IDNS_Ordinary \| Decl::IDNS_Tag \| Decl::IDNS_Member \|
	Decl::IDNS_Using \| Decl::IDNS_TagFriend \| Decl::IDNS_OrdinaryFriend \|
	Decl::IDNS_LocalExtern;
	break;

	case Sema::LookupObjCProtocolName:
	IDNS = Decl::IDNS_ObjCProtocol;
	break;

	case Sema::LookupOMPReductionName:
	IDNS = Decl::IDNS_OMPReduction;
	break;

	case Sema::LookupOMPMapperName:
	IDNS = Decl::IDNS_OMPMapper;
	break;

	case Sema::LookupAnyName:
	IDNS = Decl::IDNS_Ordinary \| Decl::IDNS_Tag \| Decl::IDNS_Member
	\| Decl::IDNS_Using \| Decl::IDNS_Namespace \| Decl::IDNS_ObjCProtocol
	\| Decl::IDNS_Type;
	break;
	}
	return IDNS;
	}

	void LookupResult::configure() {
	IDNS = getIDNS(LookupKind, getSema().getLangOpts().CPlusPlus,
	isForRedeclaration());

	// If we're looking for one of the allocation or deallocation
	// operators, make sure that the implicitly-declared new and delete
	// operators can be found.
	switch (NameInfo.getName().getCXXOverloadedOperator()) {
	case OO_New:
	case OO_Delete:
	case OO_Array_New:
	case OO_Array_Delete:
	getSema().DeclareGlobalNewDelete();
	break;

	default:
	break;
	}

	// Compiler builtins are always visible, regardless of where they end
	// up being declared.
	if (IdentifierInfo *Id = NameInfo.getName().getAsIdentifierInfo()) {
	if (unsigned BuiltinID = Id->getBuiltinID()) {
	if (!getSema().Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
	AllowHidden = true;
	}
	}
	}

	bool LookupResult::checkDebugAssumptions() const {
	// This function is never called by NDEBUG builds.
	assert(ResultKind != NotFound \|\| Decls.size() == 0);
	assert(ResultKind != Found \|\| Decls.size() == 1);
	assert(ResultKind != FoundOverloaded \|\| Decls.size() > 1 \|\|
	(Decls.size() == 1 &&
	isa<FunctionTemplateDecl>((*begin())->getUnderlyingDecl())));
	assert(ResultKind != FoundUnresolvedValue \|\| checkUnresolved());
	assert(ResultKind != Ambiguous \|\| Decls.size() > 1 \|\|
	(Decls.size() == 1 && (Ambiguity == AmbiguousBaseSubobjects \|\|
	Ambiguity == AmbiguousBaseSubobjectTypes)));
	assert((Paths != nullptr) == (ResultKind == Ambiguous &&
	(Ambiguity == AmbiguousBaseSubobjectTypes \|\|
	Ambiguity == AmbiguousBaseSubobjects)));
	return true;
	}

	// Necessary because CXXBasePaths is not complete in Sema.h
	void LookupResult::deletePaths(CXXBasePaths *Paths) {
	delete Paths;
	}

	/// Get a representative context for a declaration such that two declarations
	/// will have the same context if they were found within the same scope.
	static DeclContext getContextForScopeMatching(Decl D) {
	// For function-local declarations, use that function as the context. This
	// doesn't account for scopes within the function; the caller must deal with
	// those.
	DeclContext *DC = D->getLexicalDeclContext();
	if (DC->isFunctionOrMethod())
	return DC;

	// Otherwise, look at the semantic context of the declaration. The
	// declaration must have been found there.
	return D->getDeclContext()->getRedeclContext();
	}

	/// Determine whether \p D is a better lookup result than \p Existing,
	/// given that they declare the same entity.
	static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
	NamedDecl D, NamedDecl Existing) {
	// When looking up redeclarations of a using declaration, prefer a using
	// shadow declaration over any other declaration of the same entity.
	if (Kind == Sema::LookupUsingDeclName && isa<UsingShadowDecl>(D) &&
	!isa<UsingShadowDecl>(Existing))
	return true;

	auto *DUnderlying = D->getUnderlyingDecl();
	auto *EUnderlying = Existing->getUnderlyingDecl();

	// If they have different underlying declarations, prefer a typedef over the
	// original type (this happens when two type declarations denote the same
	// type), per a generous reading of C++ [dcl.typedef]p3 and p4. The typedef
	// might carry additional semantic information, such as an alignment override.
	// However, per C++ [dcl.typedef]p5, when looking up a tag name, prefer a tag
	// declaration over a typedef. Also prefer a tag over a typedef for
	// destructor name lookup because in some contexts we only accept a
	// class-name in a destructor declaration.
	if (DUnderlying->getCanonicalDecl() != EUnderlying->getCanonicalDecl()) {
	assert(isa<TypeDecl>(DUnderlying) && isa<TypeDecl>(EUnderlying));
	bool HaveTag = isa<TagDecl>(EUnderlying);
	bool WantTag =
	Kind == Sema::LookupTagName \|\| Kind == Sema::LookupDestructorName;
	return HaveTag != WantTag;
	}

	// Pick the function with more default arguments.
	// FIXME: In the presence of ambiguous default arguments, we should keep both,
	// so we can diagnose the ambiguity if the default argument is needed.
	// See C++ [over.match.best]p3.
	if (auto *DFD = dyn_cast<FunctionDecl>(DUnderlying)) {
	auto *EFD = cast<FunctionDecl>(EUnderlying);
	unsigned DMin = DFD->getMinRequiredArguments();
	unsigned EMin = EFD->getMinRequiredArguments();
	// If D has more default arguments, it is preferred.
	if (DMin != EMin)
	return DMin < EMin;
	// FIXME: When we track visibility for default function arguments, check
	// that we pick the declaration with more visible default arguments.
	}

	// Pick the template with more default template arguments.
	if (auto *DTD = dyn_cast<TemplateDecl>(DUnderlying)) {
	auto *ETD = cast<TemplateDecl>(EUnderlying);
	unsigned DMin = DTD->getTemplateParameters()->getMinRequiredArguments();
	unsigned EMin = ETD->getTemplateParameters()->getMinRequiredArguments();
	// If D has more default arguments, it is preferred. Note that default
	// arguments (and their visibility) is monotonically increasing across the
	// redeclaration chain, so this is a quick proxy for "is more recent".
	if (DMin != EMin)
	return DMin < EMin;
	// If D has more visible default arguments, it is preferred. Note, an
	// earlier default argument being visible does not imply that a later
	// default argument is visible, so we can't just check the first one.
	for (unsigned I = DMin, N = DTD->getTemplateParameters()->size();
	I != N; ++I) {
	if (!S.hasVisibleDefaultArgument(
	ETD->getTemplateParameters()->getParam(I)) &&
	S.hasVisibleDefaultArgument(
	DTD->getTemplateParameters()->getParam(I)))
	return true;
	}
	}

	// VarDecl can have incomplete array types, prefer the one with more complete
	// array type.
	if (VarDecl *DVD = dyn_cast<VarDecl>(DUnderlying)) {
	VarDecl *EVD = cast<VarDecl>(EUnderlying);
	if (EVD->getType()->isIncompleteType() &&
	!DVD->getType()->isIncompleteType()) {
	// Prefer the decl with a more complete type if visible.
	return S.isVisible(DVD);
	}
	return false; // Avoid picking up a newer decl, just because it was newer.
	}

	// For most kinds of declaration, it doesn't really matter which one we pick.
	if (!isa<FunctionDecl>(DUnderlying) && !isa<VarDecl>(DUnderlying)) {
	// If the existing declaration is hidden, prefer the new one. Otherwise,
	// keep what we've got.
	return !S.isVisible(Existing);
	}

	// Pick the newer declaration; it might have a more precise type.
	for (Decl *Prev = DUnderlying->getPreviousDecl(); Prev;
	Prev = Prev->getPreviousDecl())
	if (Prev == EUnderlying)
	return true;
	return false;
	}

	/// Determine whether \p D can hide a tag declaration.
	static bool canHideTag(NamedDecl *D) {
	// C++ [basic.scope.declarative]p4:
	// Given a set of declarations in a single declarative region [...]
	// exactly one declaration shall declare a class name or enumeration name
	// that is not a typedef name and the other declarations shall all refer to
	// the same variable, non-static data member, or enumerator, or all refer
	// to functions and function templates; in this case the class name or
	// enumeration name is hidden.
	// C++ [basic.scope.hiding]p2:
	// A class name or enumeration name can be hidden by the name of a
	// variable, data member, function, or enumerator declared in the same
	// scope.
	// An UnresolvedUsingValueDecl always instantiates to one of these.
	D = D->getUnderlyingDecl();
	return isa<VarDecl>(D) \|\| isa<EnumConstantDecl>(D) \|\| isa<FunctionDecl>(D) \|\|
	isa<FunctionTemplateDecl>(D) \|\| isa<FieldDecl>(D) \|\|
	isa<UnresolvedUsingValueDecl>(D);
	}

	/// Resolves the result kind of this lookup.
	void LookupResult::resolveKind() {
	unsigned N = Decls.size();

	// Fast case: no possible ambiguity.
	if (N == 0) {
	assert(ResultKind == NotFound \|\|
	ResultKind == NotFoundInCurrentInstantiation);
	return;
	}

	// If there's a single decl, we need to examine it to decide what
	// kind of lookup this is.
	if (N == 1) {
	NamedDecl D = (Decls.begin())->getUnderlyingDecl();
	if (isa<FunctionTemplateDecl>(D))
	ResultKind = FoundOverloaded;
	else if (isa<UnresolvedUsingValueDecl>(D))
	ResultKind = FoundUnresolvedValue;
	return;
	}

	// Don't do any extra resolution if we've already resolved as ambiguous.
	if (ResultKind == Ambiguous) return;

	llvm::SmallDenseMap<NamedDecl*, unsigned, 16> Unique;
	llvm::SmallDenseMap<QualType, unsigned, 16> UniqueTypes;

	bool Ambiguous = false;
	bool HasTag = false, HasFunction = false;
	bool HasFunctionTemplate = false, HasUnresolved = false;
	NamedDecl *HasNonFunction = nullptr;

	llvm::SmallVector<NamedDecl*, 4> EquivalentNonFunctions;

	unsigned UniqueTagIndex = 0;

	unsigned I = 0;
	while (I < N) {
	NamedDecl *D = Decls[I]->getUnderlyingDecl();
	D = cast<NamedDecl>(D->getCanonicalDecl());

	// Ignore an invalid declaration unless it's the only one left.
	if (D->isInvalidDecl() && !(I == 0 && N == 1)) {
	Decls[I] = Decls[--N];
	continue;
	}

	llvm::Optional<unsigned> ExistingI;

	// Redeclarations of types via typedef can occur both within a scope
	// and, through using declarations and directives, across scopes. There is
	// no ambiguity if they all refer to the same type, so unique based on the
	// canonical type.
	if (TypeDecl *TD = dyn_cast<TypeDecl>(D)) {
	QualType T = getSema().Context.getTypeDeclType(TD);
	auto UniqueResult = UniqueTypes.insert(
	std::make_pair(getSema().Context.getCanonicalType(T), I));
	if (!UniqueResult.second) {
	// The type is not unique.
	ExistingI = UniqueResult.first->second;
	}
	}

	// For non-type declarations, check for a prior lookup result naming this
	// canonical declaration.
	if (!ExistingI) {
	auto UniqueResult = Unique.insert(std::make_pair(D, I));
	if (!UniqueResult.second) {
	// We've seen this entity before.
	ExistingI = UniqueResult.first->second;
	}
	}

	if (ExistingI) {
	// This is not a unique lookup result. Pick one of the results and
	// discard the other.
	if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I],
	Decls[*ExistingI]))
	Decls[*ExistingI] = Decls[I];
	Decls[I] = Decls[--N];
	continue;
	}

	// Otherwise, do some decl type analysis and then continue.

	if (isa<UnresolvedUsingValueDecl>(D)) {
	HasUnresolved = true;
	} else if (isa<TagDecl>(D)) {
	if (HasTag)
	Ambiguous = true;
	UniqueTagIndex = I;
	HasTag = true;
	} else if (isa<FunctionTemplateDecl>(D)) {
	HasFunction = true;
	HasFunctionTemplate = true;
	} else if (isa<FunctionDecl>(D)) {
	HasFunction = true;
	} else {
	if (HasNonFunction) {
	// If we're about to create an ambiguity between two declarations that
	// are equivalent, but one is an internal linkage declaration from one
	// module and the other is an internal linkage declaration from another
	// module, just skip it.
	if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction,
	D)) {
	EquivalentNonFunctions.push_back(D);
	Decls[I] = Decls[--N];
	continue;
	}

	Ambiguous = true;
	}
	HasNonFunction = D;
	}
	I++;
	}

	// C++ [basic.scope.hiding]p2:
	// A class name or enumeration name can be hidden by the name of
	// an object, function, or enumerator declared in the same
	// scope. If a class or enumeration name and an object, function,
	// or enumerator are declared in the same scope (in any order)
	// with the same name, the class or enumeration name is hidden
	// wherever the object, function, or enumerator name is visible.
	// But it's still an error if there are distinct tag types found,
	// even if they're not visible. (ref?)
	if (N > 1 && HideTags && HasTag && !Ambiguous &&
	(HasFunction \|\| HasNonFunction \|\| HasUnresolved)) {
	NamedDecl *OtherDecl = Decls[UniqueTagIndex ? 0 : N - 1];
	if (isa<TagDecl>(Decls[UniqueTagIndex]->getUnderlyingDecl()) &&
	getContextForScopeMatching(Decls[UniqueTagIndex])->Equals(
	getContextForScopeMatching(OtherDecl)) &&
	canHideTag(OtherDecl))
	Decls[UniqueTagIndex] = Decls[--N];
	else
	Ambiguous = true;
	}

	// FIXME: This diagnostic should really be delayed until we're done with
	// the lookup result, in case the ambiguity is resolved by the caller.
	if (!EquivalentNonFunctions.empty() && !Ambiguous)
	getSema().diagnoseEquivalentInternalLinkageDeclarations(
	getNameLoc(), HasNonFunction, EquivalentNonFunctions);

	Decls.truncate(N);

	if (HasNonFunction && (HasFunction \|\| HasUnresolved))
	Ambiguous = true;

	if (Ambiguous)
	setAmbiguous(LookupResult::AmbiguousReference);
	else if (HasUnresolved)
	ResultKind = LookupResult::FoundUnresolvedValue;
	else if (N > 1 \|\| HasFunctionTemplate)
	ResultKind = LookupResult::FoundOverloaded;
	else
	ResultKind = LookupResult::Found;
	}

	void LookupResult::addDeclsFromBasePaths(const CXXBasePaths &P) {
	CXXBasePaths::const_paths_iterator I, E;
	for (I = P.begin(), E = P.end(); I != E; ++I)
	for (DeclContext::lookup_iterator DI = I->Decls, DE = DI.end(); DI != DE;
	++DI)
	addDecl(*DI);
	}

	void LookupResult::setAmbiguousBaseSubobjects(CXXBasePaths &P) {
	Paths = new CXXBasePaths;
	Paths->swap(P);
	addDeclsFromBasePaths(*Paths);
	resolveKind();
	setAmbiguous(AmbiguousBaseSubobjects);
	}

	void LookupResult::setAmbiguousBaseSubobjectTypes(CXXBasePaths &P) {
	Paths = new CXXBasePaths;
	Paths->swap(P);
	addDeclsFromBasePaths(*Paths);
	resolveKind();
	setAmbiguous(AmbiguousBaseSubobjectTypes);
	}

	void LookupResult::print(raw_ostream &Out) {
	Out << Decls.size() << " result(s)";
	if (isAmbiguous()) Out << ", ambiguous";
	if (Paths) Out << ", base paths present";

	for (iterator I = begin(), E = end(); I != E; ++I) {
	Out << "\n";
	(*I)->print(Out, 2);
	}
	}

	LLVM_DUMP_METHOD void LookupResult::dump() {
	llvm::errs() << "lookup results for " << getLookupName().getAsString()
	<< ":\n";
	for (NamedDecl D : this)
	D->dump();
	}

	/// Diagnose a missing builtin type.
	static QualType diagOpenCLBuiltinTypeError(Sema &S, llvm::StringRef TypeClass,
	llvm::StringRef Name) {
	S.Diag(SourceLocation(), diag::err_opencl_type_not_found)
	<< TypeClass << Name;
	return S.Context.VoidTy;
	}

	/// Lookup an OpenCL enum type.
	static QualType getOpenCLEnumType(Sema &S, llvm::StringRef Name) {
	LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
	Sema::LookupTagName);
	S.LookupName(Result, S.TUScope);
	if (Result.empty())
	return diagOpenCLBuiltinTypeError(S, "enum", Name);
	EnumDecl *Decl = Result.getAsSingle<EnumDecl>();
	if (!Decl)
	return diagOpenCLBuiltinTypeError(S, "enum", Name);
	return S.Context.getEnumType(Decl);
	}

	/// Lookup an OpenCL typedef type.
	static QualType getOpenCLTypedefType(Sema &S, llvm::StringRef Name) {
	LookupResult Result(S, &S.Context.Idents.get(Name), SourceLocation(),
	Sema::LookupOrdinaryName);
	S.LookupName(Result, S.TUScope);
	if (Result.empty())
	return diagOpenCLBuiltinTypeError(S, "typedef", Name);
	TypedefNameDecl *Decl = Result.getAsSingle<TypedefNameDecl>();
	if (!Decl)
	return diagOpenCLBuiltinTypeError(S, "typedef", Name);
	return S.Context.getTypedefType(Decl);
	}

	/// Get the QualType instances of the return type and arguments for an OpenCL
	/// builtin function signature.
	/// \param S (in) The Sema instance.
	/// \param OpenCLBuiltin (in) The signature currently handled.
	/// \param GenTypeMaxCnt (out) Maximum number of types contained in a generic
	/// type used as return type or as argument.
	/// Only meaningful for generic types, otherwise equals 1.
	/// \param RetTypes (out) List of the possible return types.
	/// \param ArgTypes (out) List of the possible argument types. For each
	/// argument, ArgTypes contains QualTypes for the Cartesian product
	/// of (vector sizes) x (types) .
	static void GetQualTypesForOpenCLBuiltin(
	Sema &S, const OpenCLBuiltinStruct &OpenCLBuiltin, unsigned &GenTypeMaxCnt,
	SmallVector<QualType, 1> &RetTypes,
	SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
	// Get the QualType instances of the return types.
	unsigned Sig = SignatureTable[OpenCLBuiltin.SigTableIndex];
	OCL2Qual(S, TypeTable[Sig], RetTypes);
	GenTypeMaxCnt = RetTypes.size();

	// Get the QualType instances of the arguments.
	// First type is the return type, skip it.
	for (unsigned Index = 1; Index < OpenCLBuiltin.NumTypes; Index++) {
	SmallVector<QualType, 1> Ty;
	OCL2Qual(S, TypeTable[SignatureTable[OpenCLBuiltin.SigTableIndex + Index]],
	Ty);
	GenTypeMaxCnt = (Ty.size() > GenTypeMaxCnt) ? Ty.size() : GenTypeMaxCnt;
	ArgTypes.push_back(std::move(Ty));
	}
	}

	/// Create a list of the candidate function overloads for an OpenCL builtin
	/// function.
	/// \param Context (in) The ASTContext instance.
	/// \param GenTypeMaxCnt (in) Maximum number of types contained in a generic
	/// type used as return type or as argument.
	/// Only meaningful for generic types, otherwise equals 1.
	/// \param FunctionList (out) List of FunctionTypes.
	/// \param RetTypes (in) List of the possible return types.
	/// \param ArgTypes (in) List of the possible types for the arguments.
	static void GetOpenCLBuiltinFctOverloads(
	ASTContext &Context, unsigned GenTypeMaxCnt,
	std::vector<QualType> &FunctionList, SmallVector<QualType, 1> &RetTypes,
	SmallVector<SmallVector<QualType, 1>, 5> &ArgTypes) {
	FunctionProtoType::ExtProtoInfo PI(
	Context.getDefaultCallingConvention(false, false, true));
	PI.Variadic = false;

	// Do not attempt to create any FunctionTypes if there are no return types,
	// which happens when a type belongs to a disabled extension.
	if (RetTypes.size() == 0)
	return;

	// Create FunctionTypes for each (gen)type.
	for (unsigned IGenType = 0; IGenType < GenTypeMaxCnt; IGenType++) {
	SmallVector<QualType, 5> ArgList;

	for (unsigned A = 0; A < ArgTypes.size(); A++) {
	// Bail out if there is an argument that has no available types.
	if (ArgTypes[A].size() == 0)
	return;

	// Builtins such as "max" have an "sgentype" argument that represents
	// the corresponding scalar type of a gentype. The number of gentypes
	// must be a multiple of the number of sgentypes.
	assert(GenTypeMaxCnt % ArgTypes[A].size() == 0 &&
	"argument type count not compatible with gentype type count");
	unsigned Idx = IGenType % ArgTypes[A].size();
	ArgList.push_back(ArgTypes[A][Idx]);
	}

	FunctionList.push_back(Context.getFunctionType(
	RetTypes[(RetTypes.size() != 1) ? IGenType : 0], ArgList, PI));
	}
	}

	/// When trying to resolve a function name, if isOpenCLBuiltin() returns a
	/// non-null <Index, Len> pair, then the name is referencing an OpenCL
	/// builtin function. Add all candidate signatures to the LookUpResult.
	///
	/// \param S (in) The Sema instance.
	/// \param LR (inout) The LookupResult instance.
	/// \param II (in) The identifier being resolved.
	/// \param FctIndex (in) Starting index in the BuiltinTable.
	/// \param Len (in) The signature list has Len elements.
	static void InsertOCLBuiltinDeclarationsFromTable(Sema &S, LookupResult &LR,
	IdentifierInfo *II,
	const unsigned FctIndex,
	const unsigned Len) {
	// The builtin function declaration uses generic types (gentype).
	bool HasGenType = false;

	// Maximum number of types contained in a generic type used as return type or
	// as argument. Only meaningful for generic types, otherwise equals 1.
	unsigned GenTypeMaxCnt;

	ASTContext &Context = S.Context;

	for (unsigned SignatureIndex = 0; SignatureIndex < Len; SignatureIndex++) {
	const OpenCLBuiltinStruct &OpenCLBuiltin =
	BuiltinTable[FctIndex + SignatureIndex];

	// Ignore this builtin function if it is not available in the currently
	// selected language version.
	if (!isOpenCLVersionContainedInMask(Context.getLangOpts(),
	OpenCLBuiltin.Versions))
	continue;

	// Ignore this builtin function if it carries an extension macro that is
	// not defined. This indicates that the extension is not supported by the
	// target, so the builtin function should not be available.
	StringRef Extensions = FunctionExtensionTable[OpenCLBuiltin.Extension];
	if (!Extensions.empty()) {
	SmallVector<StringRef, 2> ExtVec;
	Extensions.split(ExtVec, " ");
	bool AllExtensionsDefined = true;
	for (StringRef Ext : ExtVec) {
	if (!S.getPreprocessor().isMacroDefined(Ext)) {
	AllExtensionsDefined = false;
	break;
	}
	}
	if (!AllExtensionsDefined)
	continue;
	}

	SmallVector<QualType, 1> RetTypes;
	SmallVector<SmallVector<QualType, 1>, 5> ArgTypes;

	// Obtain QualType lists for the function signature.
	GetQualTypesForOpenCLBuiltin(S, OpenCLBuiltin, GenTypeMaxCnt, RetTypes,
	ArgTypes);
	if (GenTypeMaxCnt > 1) {
	HasGenType = true;
	}

	// Create function overload for each type combination.
	std::vector<QualType> FunctionList;
	GetOpenCLBuiltinFctOverloads(Context, GenTypeMaxCnt, FunctionList, RetTypes,
	ArgTypes);

	SourceLocation Loc = LR.getNameLoc();
	DeclContext *Parent = Context.getTranslationUnitDecl();
	FunctionDecl *NewOpenCLBuiltin;

	for (const auto &FTy : FunctionList) {
	NewOpenCLBuiltin = FunctionDecl::Create(
	Context, Parent, Loc, Loc, II, FTy, /TInfo=/nullptr, SC_Extern,
	S.getCurFPFeatures().isFPConstrained(), false,
	FTy->isFunctionProtoType());
	NewOpenCLBuiltin->setImplicit();

	// Create Decl objects for each parameter, adding them to the
	// FunctionDecl.
	const auto *FP = cast<FunctionProtoType>(FTy);
	SmallVector<ParmVarDecl *, 4> ParmList;
	for (unsigned IParm = 0, e = FP->getNumParams(); IParm != e; ++IParm) {
	ParmVarDecl *Parm = ParmVarDecl::Create(
	Context, NewOpenCLBuiltin, SourceLocation(), SourceLocation(),
	nullptr, FP->getParamType(IParm), nullptr, SC_None, nullptr);
	Parm->setScopeInfo(0, IParm);
	ParmList.push_back(Parm);
	}
	NewOpenCLBuiltin->setParams(ParmList);

	// Add function attributes.
	if (OpenCLBuiltin.IsPure)
	NewOpenCLBuiltin->addAttr(PureAttr::CreateImplicit(Context));
	if (OpenCLBuiltin.IsConst)
	NewOpenCLBuiltin->addAttr(ConstAttr::CreateImplicit(Context));
	if (OpenCLBuiltin.IsConv)
	NewOpenCLBuiltin->addAttr(ConvergentAttr::CreateImplicit(Context));

	if (!S.getLangOpts().OpenCLCPlusPlus)
	NewOpenCLBuiltin->addAttr(OverloadableAttr::CreateImplicit(Context));

	LR.addDecl(NewOpenCLBuiltin);
	}
	}

	// If we added overloads, need to resolve the lookup result.
	if (Len > 1 \|\| HasGenType)
	LR.resolveKind();
	}

	/// Lookup a builtin function, when name lookup would otherwise
	/// fail.
	bool Sema::LookupBuiltin(LookupResult &R) {
	Sema::LookupNameKind NameKind = R.getLookupKind();

	// If we didn't find a use of this identifier, and if the identifier
	// corresponds to a compiler builtin, create the decl object for the builtin
	// now, injecting it into translation unit scope, and return it.
	if (NameKind == Sema::LookupOrdinaryName \|\|
	NameKind == Sema::LookupRedeclarationWithLinkage) {
	IdentifierInfo *II = R.getLookupName().getAsIdentifierInfo();
	if (II) {
	if (getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName) {
	if (II == getASTContext().getMakeIntegerSeqName()) {
	R.addDecl(getASTContext().getMakeIntegerSeqDecl());
	return true;
	} else if (II == getASTContext().getTypePackElementName()) {
	R.addDecl(getASTContext().getTypePackElementDecl());
	return true;
	}
	}

	// Check if this is an OpenCL Builtin, and if so, insert its overloads.
	if (getLangOpts().OpenCL && getLangOpts().DeclareOpenCLBuiltins) {
	auto Index = isOpenCLBuiltin(II->getName());
	if (Index.first) {
	InsertOCLBuiltinDeclarationsFromTable(*this, R, II, Index.first - 1,
	Index.second);
	return true;
	}
	}

	if (DeclareRISCVVBuiltins) {
	if (!RVIntrinsicManager)
	RVIntrinsicManager = CreateRISCVIntrinsicManager(*this);

	if (RVIntrinsicManager->CreateIntrinsicIfFound(R, II, PP))
	return true;
	}

	// If this is a builtin on this (or all) targets, create the decl.
	if (unsigned BuiltinID = II->getBuiltinID()) {
	- // In C++, C2x, and OpenCL (spec v1.2 s6.9.f), we don't have any
	- // predefined library functions like 'malloc'. Instead, we'll just
	- // error.
	- if ((getLangOpts().CPlusPlus \|\| getLangOpts().OpenCL \|\|
	- getLangOpts().C2x) &&
	+ // In C++ and OpenCL (spec v1.2 s6.9.f), we don't have any predefined
	+ // library functions like 'malloc'. Instead, we'll just error.
	+ if ((getLangOpts().CPlusPlus \|\| getLangOpts().OpenCL) &&
	Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID))
	return false;

	if (NamedDecl *D =
	LazilyCreateBuiltin(II, BuiltinID, TUScope,
	R.isForRedeclaration(), R.getNameLoc())) {
	R.addDecl(D);
	return true;
	}
	}
	}
	}

	return false;
	}

	/// Looks up the declaration of "struct objc_super" and
	/// saves it for later use in building builtin declaration of
	/// objc_msgSendSuper and objc_msgSendSuper_stret.
	static void LookupPredefedObjCSuperType(Sema &Sema, Scope *S) {
	ASTContext &Context = Sema.Context;
	LookupResult Result(Sema, &Context.Idents.get("objc_super"), SourceLocation(),
	Sema::LookupTagName);
	Sema.LookupName(Result, S);
	if (Result.getResultKind() == LookupResult::Found)
	if (const TagDecl *TD = Result.getAsSingle<TagDecl>())
	Context.setObjCSuperType(Context.getTagDeclType(TD));
	}

	void Sema::LookupNecessaryTypesForBuiltin(Scope *S, unsigned ID) {
	if (ID == Builtin::BIobjc_msgSendSuper)
	LookupPredefedObjCSuperType(*this, S);
	}

	/// Determine whether we can declare a special member function within
	/// the class at this point.
	static bool CanDeclareSpecialMemberFunction(const CXXRecordDecl *Class) {
	// We need to have a definition for the class.
	if (!Class->getDefinition() \|\| Class->isDependentContext())
	return false;

	// We can't be in the middle of defining the class.
	return !Class->isBeingDefined();
	}

	void Sema::ForceDeclarationOfImplicitMembers(CXXRecordDecl *Class) {
	if (!CanDeclareSpecialMemberFunction(Class))
	return;

	// If the default constructor has not yet been declared, do so now.
	if (Class->needsImplicitDefaultConstructor())
	DeclareImplicitDefaultConstructor(Class);

	// If the copy constructor has not yet been declared, do so now.
	if (Class->needsImplicitCopyConstructor())
	DeclareImplicitCopyConstructor(Class);

	// If the copy assignment operator has not yet been declared, do so now.
	if (Class->needsImplicitCopyAssignment())
	DeclareImplicitCopyAssignment(Class);

	if (getLangOpts().CPlusPlus11) {
	// If the move constructor has not yet been declared, do so now.
	if (Class->needsImplicitMoveConstructor())
	DeclareImplicitMoveConstructor(Class);

	// If the move assignment operator has not yet been declared, do so now.
	if (Class->needsImplicitMoveAssignment())
	DeclareImplicitMoveAssignment(Class);
	}

	// If the destructor has not yet been declared, do so now.
	if (Class->needsImplicitDestructor())
	DeclareImplicitDestructor(Class);
	}

	/// Determine whether this is the name of an implicitly-declared
	/// special member function.
	static bool isImplicitlyDeclaredMemberFunctionName(DeclarationName Name) {
	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	case DeclarationName::CXXDestructorName:
	return true;

	case DeclarationName::CXXOperatorName:
	return Name.getCXXOverloadedOperator() == OO_Equal;

	default:
	break;
	}

	return false;
	}

	/// If there are any implicit member functions with the given name
	/// that need to be declared in the given declaration context, do so.
	static void DeclareImplicitMemberFunctionsWithName(Sema &S,
	DeclarationName Name,
	SourceLocation Loc,
	const DeclContext *DC) {
	if (!DC)
	return;

	switch (Name.getNameKind()) {
	case DeclarationName::CXXConstructorName:
	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
	if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
	CXXRecordDecl Class = const_cast<CXXRecordDecl >(Record);
	if (Record->needsImplicitDefaultConstructor())
	S.DeclareImplicitDefaultConstructor(Class);
	if (Record->needsImplicitCopyConstructor())
	S.DeclareImplicitCopyConstructor(Class);
	if (S.getLangOpts().CPlusPlus11 &&
	Record->needsImplicitMoveConstructor())
	S.DeclareImplicitMoveConstructor(Class);
	}
	break;

	case DeclarationName::CXXDestructorName:
	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC))
	if (Record->getDefinition() && Record->needsImplicitDestructor() &&
	CanDeclareSpecialMemberFunction(Record))
	S.DeclareImplicitDestructor(const_cast<CXXRecordDecl *>(Record));
	break;

	case DeclarationName::CXXOperatorName:
	if (Name.getCXXOverloadedOperator() != OO_Equal)
	break;

	if (const CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(DC)) {
	if (Record->getDefinition() && CanDeclareSpecialMemberFunction(Record)) {
	CXXRecordDecl Class = const_cast<CXXRecordDecl >(Record);
	if (Record->needsImplicitCopyAssignment())
	S.DeclareImplicitCopyAssignment(Class);
	if (S.getLangOpts().CPlusPlus11 &&
	Record->needsImplicitMoveAssignment())
	S.DeclareImplicitMoveAssignment(Class);
	}
	}
	break;

	case DeclarationName::CXXDeductionGuideName:
	S.DeclareImplicitDeductionGuides(Name.getCXXDeductionGuideTemplate(), Loc);
	break;

	default:
	break;
	}
	}

	// Adds all qualifying matches for a name within a decl context to the
	// given lookup result. Returns true if any matches were found.
	static bool LookupDirect(Sema &S, LookupResult &R, const DeclContext *DC) {
	bool Found = false;

	// Lazily declare C++ special member functions.
	if (S.getLangOpts().CPlusPlus)
	DeclareImplicitMemberFunctionsWithName(S, R.getLookupName(), R.getNameLoc(),
	DC);

	// Perform lookup into this declaration context.
	DeclContext::lookup_result DR = DC->lookup(R.getLookupName());
	for (NamedDecl *D : DR) {
	if ((D = R.getAcceptableDecl(D))) {
	R.addDecl(D);
	Found = true;
	}
	}

	if (!Found && DC->isTranslationUnit() && S.LookupBuiltin(R))
	return true;

	if (R.getLookupName().getNameKind()
	!= DeclarationName::CXXConversionFunctionName \|\|
	R.getLookupName().getCXXNameType()->isDependentType() \|\|
	!isa<CXXRecordDecl>(DC))
	return Found;

	// C++ [temp.mem]p6:
	// A specialization of a conversion function template is not found by
	// name lookup. Instead, any conversion function templates visible in the
	// context of the use are considered. [...]
	const CXXRecordDecl *Record = cast<CXXRecordDecl>(DC);
	if (!Record->isCompleteDefinition())
	return Found;

	// For conversion operators, 'operator auto' should only match
	// 'operator auto'. Since 'auto' is not a type, it shouldn't be considered
	// as a candidate for template substitution.
	auto *ContainedDeducedType =
	R.getLookupName().getCXXNameType()->getContainedDeducedType();
	if (R.getLookupName().getNameKind() ==
	DeclarationName::CXXConversionFunctionName &&
	ContainedDeducedType && ContainedDeducedType->isUndeducedType())
	return Found;

	for (CXXRecordDecl::conversion_iterator U = Record->conversion_begin(),
	UEnd = Record->conversion_end(); U != UEnd; ++U) {
	FunctionTemplateDecl ConvTemplate = dyn_cast<FunctionTemplateDecl>(U);
	if (!ConvTemplate)
	continue;

	// When we're performing lookup for the purposes of redeclaration, just
	// add the conversion function template. When we deduce template
	// arguments for specializations, we'll end up unifying the return
	// type of the new declaration with the type of the function template.
	if (R.isForRedeclaration()) {
	R.addDecl(ConvTemplate);
	Found = true;
	continue;
	}

	// C++ [temp.mem]p6:
	// [...] For each such operator, if argument deduction succeeds
	// (14.9.2.3), the resulting specialization is used as if found by
	// name lookup.
	//
	// When referencing a conversion function for any purpose other than
	// a redeclaration (such that we'll be building an expression with the
	// result), perform template argument deduction and place the
	// specialization into the result set. We do this to avoid forcing all
	// callers to perform special deduction for conversion functions.
	TemplateDeductionInfo Info(R.getNameLoc());
	FunctionDecl *Specialization = nullptr;

	const FunctionProtoType *ConvProto
	= ConvTemplate->getTemplatedDecl()->getType()->getAs<FunctionProtoType>();
	assert(ConvProto && "Nonsensical conversion function template type");

	// Compute the type of the function that we would expect the conversion
	// function to have, if it were to match the name given.
	// FIXME: Calling convention!
	FunctionProtoType::ExtProtoInfo EPI = ConvProto->getExtProtoInfo();
	EPI.ExtInfo = EPI.ExtInfo.withCallingConv(CC_C);
	EPI.ExceptionSpec = EST_None;
	QualType ExpectedType
	= R.getSema().Context.getFunctionType(R.getLookupName().getCXXNameType(),
	None, EPI);

	// Perform template argument deduction against the type that we would
	// expect the function to have.
	if (R.getSema().DeduceTemplateArguments(ConvTemplate, nullptr, ExpectedType,
	Specialization, Info)
	== Sema::TDK_Success) {
	R.addDecl(Specialization);
	Found = true;
	}
	}

	return Found;
	}

	// Performs C++ unqualified lookup into the given file context.
	static bool
	CppNamespaceLookup(Sema &S, LookupResult &R, ASTContext &Context,
	DeclContext *NS, UnqualUsingDirectiveSet &UDirs) {

	assert(NS && NS->isFileContext() && "CppNamespaceLookup() requires namespace!");

	// Perform direct name lookup into the LookupCtx.
	bool Found = LookupDirect(S, R, NS);

	// Perform direct name lookup into the namespaces nominated by the
	// using directives whose common ancestor is this namespace.
	for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(NS))
	if (LookupDirect(S, R, UUE.getNominatedNamespace()))
	Found = true;

	R.resolveKind();

	return Found;
	}

	static bool isNamespaceOrTranslationUnitScope(Scope *S) {
	if (DeclContext *Ctx = S->getEntity())
	return Ctx->isFileContext();
	return false;
	}

	/// Find the outer declaration context from this scope. This indicates the
	/// context that we should search up to (exclusive) before considering the
	/// parent of the specified scope.
	static DeclContext findOuterContext(Scope S) {
	for (Scope *OuterS = S->getParent(); OuterS; OuterS = OuterS->getParent())
	if (DeclContext *DC = OuterS->getLookupEntity())
	return DC;
	return nullptr;
	}

	namespace {
	/// An RAII object to specify that we want to find block scope extern
	/// declarations.
	struct FindLocalExternScope {
	FindLocalExternScope(LookupResult &R)
	: R(R), OldFindLocalExtern(R.getIdentifierNamespace() &
	Decl::IDNS_LocalExtern) {
	R.setFindLocalExtern(R.getIdentifierNamespace() &
	(Decl::IDNS_Ordinary \| Decl::IDNS_NonMemberOperator));
	}
	void restore() {
	R.setFindLocalExtern(OldFindLocalExtern);
	}
	~FindLocalExternScope() {
	restore();
	}
	LookupResult &R;
	bool OldFindLocalExtern;
	};
	} // end anonymous namespace

	bool Sema::CppLookupName(LookupResult &R, Scope *S) {
	assert(getLangOpts().CPlusPlus && "Can perform only C++ lookup");

	DeclarationName Name = R.getLookupName();
	Sema::LookupNameKind NameKind = R.getLookupKind();

	// If this is the name of an implicitly-declared special member function,
	// go through the scope stack to implicitly declare
	if (isImplicitlyDeclaredMemberFunctionName(Name)) {
	for (Scope *PreS = S; PreS; PreS = PreS->getParent())
	if (DeclContext *DC = PreS->getEntity())
	DeclareImplicitMemberFunctionsWithName(*this, Name, R.getNameLoc(), DC);
	}

	// Implicitly declare member functions with the name we're looking for, if in
	// fact we are in a scope where it matters.

	Scope *Initial = S;
	IdentifierResolver::iterator
	I = IdResolver.begin(Name),
	IEnd = IdResolver.end();

	// First we lookup local scope.
	// We don't consider using-directives, as per 7.3.4.p1 [namespace.udir]
	// ...During unqualified name lookup (3.4.1), the names appear as if
	// they were declared in the nearest enclosing namespace which contains
	// both the using-directive and the nominated namespace.
	// [Note: in this context, "contains" means "contains directly or
	// indirectly".
	//
	// For example:
	// namespace A { int i; }
	// void foo() {
	// int i;
	// {
	// using namespace A;
	// ++i; // finds local 'i', A::i appears at global scope
	// }
	// }
	//
	UnqualUsingDirectiveSet UDirs(*this);
	bool VisitedUsingDirectives = false;
	bool LeftStartingScope = false;

	// When performing a scope lookup, we want to find local extern decls.
	FindLocalExternScope FindLocals(R);

	for (; S && !isNamespaceOrTranslationUnitScope(S); S = S->getParent()) {
	bool SearchNamespaceScope = true;
	// Check whether the IdResolver has anything in this scope.
	for (; I != IEnd && S->isDeclScope(*I); ++I) {
	if (NamedDecl ND = R.getAcceptableDecl(I)) {
	if (NameKind == LookupRedeclarationWithLinkage &&
	!(*I)->isTemplateParameter()) {
	// If it's a template parameter, we still find it, so we can diagnose
	// the invalid redeclaration.

	// Determine whether this (or a previous) declaration is
	// out-of-scope.
	if (!LeftStartingScope && !Initial->isDeclScope(*I))
	LeftStartingScope = true;

	// If we found something outside of our starting scope that
	// does not have linkage, skip it.
	if (LeftStartingScope && !((*I)->hasLinkage())) {
	R.setShadowed();
	continue;
	}
	} else {
	// We found something in this scope, we should not look at the
	// namespace scope
	SearchNamespaceScope = false;
	}
	R.addDecl(ND);
	}
	}
	if (!SearchNamespaceScope) {
	R.resolveKind();
	if (S->isClassScope())
	if (CXXRecordDecl *Record =
	dyn_cast_or_null<CXXRecordDecl>(S->getEntity()))
	R.setNamingClass(Record);
	return true;
	}

	if (NameKind == LookupLocalFriendName && !S->isClassScope()) {
	// C++11 [class.friend]p11:
	// If a friend declaration appears in a local class and the name
	// specified is an unqualified name, a prior declaration is
	// looked up without considering scopes that are outside the
	// innermost enclosing non-class scope.
	return false;
	}

	if (DeclContext *Ctx = S->getLookupEntity()) {
	DeclContext *OuterCtx = findOuterContext(S);
	for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
	// We do not directly look into transparent contexts, since
	// those entities will be found in the nearest enclosing
	// non-transparent context.
	if (Ctx->isTransparentContext())
	continue;

	// We do not look directly into function or method contexts,
	// since all of the local variables and parameters of the
	// function/method are present within the Scope.
	if (Ctx->isFunctionOrMethod()) {
	// If we have an Objective-C instance method, look for ivars
	// in the corresponding interface.
	if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
	if (Method->isInstanceMethod() && Name.getAsIdentifierInfo())
	if (ObjCInterfaceDecl *Class = Method->getClassInterface()) {
	ObjCInterfaceDecl *ClassDeclared;
	if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(
	Name.getAsIdentifierInfo(),
	ClassDeclared)) {
	if (NamedDecl *ND = R.getAcceptableDecl(Ivar)) {
	R.addDecl(ND);
	R.resolveKind();
	return true;
	}
	}
	}
	}

	continue;
	}

	// If this is a file context, we need to perform unqualified name
	// lookup considering using directives.
	if (Ctx->isFileContext()) {
	// If we haven't handled using directives yet, do so now.
	if (!VisitedUsingDirectives) {
	// Add using directives from this context up to the top level.
	for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) {
	if (UCtx->isTransparentContext())
	continue;

	UDirs.visit(UCtx, UCtx);
	}

	// Find the innermost file scope, so we can add using directives
	// from local scopes.
	Scope *InnermostFileScope = S;
	while (InnermostFileScope &&
	!isNamespaceOrTranslationUnitScope(InnermostFileScope))
	InnermostFileScope = InnermostFileScope->getParent();
	UDirs.visitScopeChain(Initial, InnermostFileScope);

	UDirs.done();

	VisitedUsingDirectives = true;
	}

	if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs)) {
	R.resolveKind();
	return true;
	}

	continue;
	}

	// Perform qualified name lookup into this context.
	// FIXME: In some cases, we know that every name that could be found by
	// this qualified name lookup will also be on the identifier chain. For
	// example, inside a class without any base classes, we never need to
	// perform qualified lookup because all of the members are on top of the
	// identifier chain.
	if (LookupQualifiedName(R, Ctx, /InUnqualifiedLookup=/true))
	return true;
	}
	}
	}

	// Stop if we ran out of scopes.
	// FIXME: This really, really shouldn't be happening.
	if (!S) return false;

	// If we are looking for members, no need to look into global/namespace scope.
	if (NameKind == LookupMemberName)
	return false;

	// Collect UsingDirectiveDecls in all scopes, and recursively all
	// nominated namespaces by those using-directives.
	//
	// FIXME: Cache this sorted list in Scope structure, and DeclContext, so we
	// don't build it for each lookup!
	if (!VisitedUsingDirectives) {
	UDirs.visitScopeChain(Initial, S);
	UDirs.done();
	}

	// If we're not performing redeclaration lookup, do not look for local
	// extern declarations outside of a function scope.
	if (!R.isForRedeclaration())
	FindLocals.restore();

	// Lookup namespace scope, and global scope.
	// Unqualified name lookup in C++ requires looking into scopes
	// that aren't strictly lexical, and therefore we walk through the
	// context as well as walking through the scopes.
	for (; S; S = S->getParent()) {
	// Check whether the IdResolver has anything in this scope.
	bool Found = false;
	for (; I != IEnd && S->isDeclScope(*I); ++I) {
	if (NamedDecl ND = R.getAcceptableDecl(I)) {
	// We found something. Look for anything else in our scope
	// with this same name and in an acceptable identifier
	// namespace, so that we can construct an overload set if we
	// need to.
	Found = true;
	R.addDecl(ND);
	}
	}

	if (Found && S->isTemplateParamScope()) {
	R.resolveKind();
	return true;
	}

	DeclContext *Ctx = S->getLookupEntity();
	if (Ctx) {
	DeclContext *OuterCtx = findOuterContext(S);
	for (; Ctx && !Ctx->Equals(OuterCtx); Ctx = Ctx->getLookupParent()) {
	// We do not directly look into transparent contexts, since
	// those entities will be found in the nearest enclosing
	// non-transparent context.
	if (Ctx->isTransparentContext())
	continue;

	// If we have a context, and it's not a context stashed in the
	// template parameter scope for an out-of-line definition, also
	// look into that context.
	if (!(Found && S->isTemplateParamScope())) {
	assert(Ctx->isFileContext() &&
	"We should have been looking only at file context here already.");

	// Look into context considering using-directives.
	if (CppNamespaceLookup(*this, R, Context, Ctx, UDirs))
	Found = true;
	}

	if (Found) {
	R.resolveKind();
	return true;
	}

	if (R.isForRedeclaration() && !Ctx->isTransparentContext())
	return false;
	}
	}

	if (R.isForRedeclaration() && Ctx && !Ctx->isTransparentContext())
	return false;
	}

	return !R.empty();
	}

	void Sema::makeMergedDefinitionVisible(NamedDecl *ND) {
	if (auto *M = getCurrentModule())
	Context.mergeDefinitionIntoModule(ND, M);
	else
	// We're not building a module; just make the definition visible.
	ND->setVisibleDespiteOwningModule();

	// If ND is a template declaration, make the template parameters
	// visible too. They're not (necessarily) within a mergeable DeclContext.
	if (auto *TD = dyn_cast<TemplateDecl>(ND))
	for (auto Param : TD->getTemplateParameters())
	makeMergedDefinitionVisible(Param);
	}

	/// Find the module in which the given declaration was defined.
	static Module getDefiningModule(Sema &S, Decl Entity) {
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Entity)) {
	// If this function was instantiated from a template, the defining module is
	// the module containing the pattern.
	if (FunctionDecl *Pattern = FD->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(Entity)) {
	if (CXXRecordDecl *Pattern = RD->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (EnumDecl *ED = dyn_cast<EnumDecl>(Entity)) {
	if (auto *Pattern = ED->getTemplateInstantiationPattern())
	Entity = Pattern;
	} else if (VarDecl *VD = dyn_cast<VarDecl>(Entity)) {
	if (VarDecl *Pattern = VD->getTemplateInstantiationPattern())
	Entity = Pattern;
	}

	// Walk up to the containing context. That might also have been instantiated
	// from a template.
	DeclContext *Context = Entity->getLexicalDeclContext();
	if (Context->isFileContext())
	return S.getOwningModule(Entity);
	return getDefiningModule(S, cast<Decl>(Context));
	}

	llvm::DenseSet<Module*> &Sema::getLookupModules() {
	unsigned N = CodeSynthesisContexts.size();
	for (unsigned I = CodeSynthesisContextLookupModules.size();
	I != N; ++I) {
	Module *M = CodeSynthesisContexts[I].Entity ?
	getDefiningModule(*this, CodeSynthesisContexts[I].Entity) :
	nullptr;
	if (M && !LookupModulesCache.insert(M).second)
	M = nullptr;
	CodeSynthesisContextLookupModules.push_back(M);
	}
	return LookupModulesCache;
	}

	/// Determine if we could use all the declarations in the module.
	bool Sema::isUsableModule(const Module *M) {
	assert(M && "We shouldn't check nullness for module here");
	// Return quickly if we cached the result.
	if (UsableModuleUnitsCache.count(M))
	return true;

	// If M is the global module fragment of the current translation unit. So it
	// should be usable.
	// [module.global.frag]p1:
	// The global module fragment can be used to provide declarations that are
	// attached to the global module and usable within the module unit.
	if (M == GlobalModuleFragment \|\|
	// If M is the module we're parsing, it should be usable. This covers the
	// private module fragment. The private module fragment is usable only if
	// it is within the current module unit. And it must be the current
	// parsing module unit if it is within the current module unit according
	// to the grammar of the private module fragment. NOTE: This is covered by
	// the following condition. The intention of the check is to avoid string
	// comparison as much as possible.
	M == getCurrentModule() \|\|
	// The module unit which is in the same module with the current module
	// unit is usable.
	//
	// FIXME: Here we judge if they are in the same module by comparing the
	// string. Is there any better solution?
	M->getPrimaryModuleInterfaceName() ==
	llvm::StringRef(getLangOpts().CurrentModule).split(':').first) {
	UsableModuleUnitsCache.insert(M);
	return true;
	}

	return false;
	}

	bool Sema::hasVisibleMergedDefinition(NamedDecl *Def) {
	for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
	if (isModuleVisible(Merged))
	return true;
	return false;
	}

	bool Sema::hasMergedDefinitionInCurrentModule(NamedDecl *Def) {
	for (const Module *Merged : Context.getModulesWithMergedDefinition(Def))
	if (isUsableModule(Merged))
	return true;
	return false;
	}

	template <typename ParmDecl>
	static bool
	hasAcceptableDefaultArgument(Sema &S, const ParmDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	if (!D->hasDefaultArgument())
	return false;

	llvm::SmallDenseSet<const ParmDecl *, 4> Visited;
	while (D && !Visited.count(D)) {
	Visited.insert(D);

	auto &DefaultArg = D->getDefaultArgStorage();
	if (!DefaultArg.isInherited() && S.isAcceptable(D, Kind))
	return true;

	if (!DefaultArg.isInherited() && Modules) {
	auto NonConstD = const_cast<ParmDecl>(D);
	Modules->push_back(S.getOwningModule(NonConstD));
	}

	// If there was a previous default argument, maybe its parameter is
	// acceptable.
	D = DefaultArg.getInheritedFrom();
	}
	return false;
	}

	bool Sema::hasAcceptableDefaultArgument(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules,
	Sema::AcceptableKind Kind) {
	if (auto *P = dyn_cast<TemplateTypeParmDecl>(D))
	return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);

	if (auto *P = dyn_cast<NonTypeTemplateParmDecl>(D))
	return ::hasAcceptableDefaultArgument(*this, P, Modules, Kind);

	return ::hasAcceptableDefaultArgument(
	*this, cast<TemplateTemplateParmDecl>(D), Modules, Kind);
	}

	bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules) {
	return hasAcceptableDefaultArgument(D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableDefaultArgument(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableDefaultArgument(D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	template <typename Filter>
	static bool
	hasAcceptableDeclarationImpl(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules, Filter F,
	Sema::AcceptableKind Kind) {
	bool HasFilteredRedecls = false;

	for (auto *Redecl : D->redecls()) {
	auto *R = cast<NamedDecl>(Redecl);
	if (!F(R))
	continue;

	if (S.isAcceptable(R, Kind))
	return true;

	HasFilteredRedecls = true;

	if (Modules)
	Modules->push_back(R->getOwningModule());
	}

	// Only return false if there is at least one redecl that is not filtered out.
	if (HasFilteredRedecls)
	return false;

	return true;
	}

	static bool
	hasAcceptableExplicitSpecialization(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	return hasAcceptableDeclarationImpl(
	S, D, Modules,
	[](const NamedDecl *D) {
	if (auto *RD = dyn_cast<CXXRecordDecl>(D))
	return RD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	if (auto *FD = dyn_cast<FunctionDecl>(D))
	return FD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	if (auto *VD = dyn_cast<VarDecl>(D))
	return VD->getTemplateSpecializationKind() ==
	TSK_ExplicitSpecialization;
	llvm_unreachable("unknown explicit specialization kind");
	},
	Kind);
	}

	bool Sema::hasVisibleExplicitSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableExplicitSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return ::hasAcceptableExplicitSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	static bool
	hasAcceptableMemberSpecialization(Sema &S, const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules,
	Sema::AcceptableKind Kind) {
	assert(isa<CXXRecordDecl>(D->getDeclContext()) &&
	"not a member specialization");
	return hasAcceptableDeclarationImpl(
	S, D, Modules,
	[](const NamedDecl *D) {
	// If the specialization is declared at namespace scope, then it's a
	// member specialization declaration. If it's lexically inside the class
	// definition then it was instantiated.
	//
	// FIXME: This is a hack. There should be a better way to determine
	// this.
	// FIXME: What about MS-style explicit specializations declared within a
	// class definition?
	return D->getLexicalDeclContext()->isFileContext();
	},
	Kind);
	}

	bool Sema::hasVisibleMemberSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableMemberSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableMemberSpecialization(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	return hasAcceptableMemberSpecialization(*this, D, Modules,
	Sema::AcceptableKind::Reachable);
	}

	/// Determine whether a declaration is acceptable to name lookup.
	///
	/// This routine determines whether the declaration D is acceptable in the
	/// current lookup context, taking into account the current template
	/// instantiation stack. During template instantiation, a declaration is
	/// acceptable if it is acceptable from a module containing any entity on the
	/// template instantiation path (by instantiating a template, you allow it to
	/// see the declarations that your module can see, including those later on in
	/// your module).
	bool LookupResult::isAcceptableSlow(Sema &SemaRef, NamedDecl *D,
	Sema::AcceptableKind Kind) {
	assert(!D->isUnconditionallyVisible() &&
	"should not call this: not in slow case");

	Module *DeclModule = SemaRef.getOwningModule(D);
	assert(DeclModule && "hidden decl has no owning module");

	// If the owning module is visible, the decl is acceptable.
	if (SemaRef.isModuleVisible(DeclModule,
	D->isInvisibleOutsideTheOwningModule()))
	return true;

	// Determine whether a decl context is a file context for the purpose of
	// visibility/reachability. This looks through some (export and linkage spec)
	// transparent contexts, but not others (enums).
	auto IsEffectivelyFileContext = [](const DeclContext *DC) {
	return DC->isFileContext() \|\| isa<LinkageSpecDecl>(DC) \|\|
	isa<ExportDecl>(DC);
	};

	// If this declaration is not at namespace scope
	// then it is acceptable if its lexical parent has a acceptable definition.
	DeclContext *DC = D->getLexicalDeclContext();
	if (DC && !IsEffectivelyFileContext(DC)) {
	// For a parameter, check whether our current template declaration's
	// lexical context is acceptable, not whether there's some other acceptable
	// definition of it, because parameters aren't "within" the definition.
	//
	// In C++ we need to check for a acceptable definition due to ODR merging,
	// and in C we must not because each declaration of a function gets its own
	// set of declarations for tags in prototype scope.
	bool AcceptableWithinParent;
	if (D->isTemplateParameter()) {
	bool SearchDefinitions = true;
	if (const auto *DCD = dyn_cast<Decl>(DC)) {
	if (const auto *TD = DCD->getDescribedTemplate()) {
	TemplateParameterList *TPL = TD->getTemplateParameters();
	auto Index = getDepthAndIndex(D).second;
	SearchDefinitions = Index >= TPL->size() \|\| TPL->getParam(Index) != D;
	}
	}
	if (SearchDefinitions)
	AcceptableWithinParent =
	SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
	else
	AcceptableWithinParent =
	isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
	} else if (isa<ParmVarDecl>(D) \|\|
	(isa<FunctionDecl>(DC) && !SemaRef.getLangOpts().CPlusPlus))
	AcceptableWithinParent = isAcceptable(SemaRef, cast<NamedDecl>(DC), Kind);
	else if (D->isModulePrivate()) {
	// A module-private declaration is only acceptable if an enclosing lexical
	// parent was merged with another definition in the current module.
	AcceptableWithinParent = false;
	do {
	if (SemaRef.hasMergedDefinitionInCurrentModule(cast<NamedDecl>(DC))) {
	AcceptableWithinParent = true;
	break;
	}
	DC = DC->getLexicalParent();
	} while (!IsEffectivelyFileContext(DC));
	} else {
	AcceptableWithinParent =
	SemaRef.hasAcceptableDefinition(cast<NamedDecl>(DC), Kind);
	}

	if (AcceptableWithinParent && SemaRef.CodeSynthesisContexts.empty() &&
	Kind == Sema::AcceptableKind::Visible &&
	// FIXME: Do something better in this case.
	!SemaRef.getLangOpts().ModulesLocalVisibility) {
	// Cache the fact that this declaration is implicitly visible because
	// its parent has a visible definition.
	D->setVisibleDespiteOwningModule();
	}
	return AcceptableWithinParent;
	}

	if (Kind == Sema::AcceptableKind::Visible)
	return false;

	assert(Kind == Sema::AcceptableKind::Reachable &&
	"Additional Sema::AcceptableKind?");
	return isReachableSlow(SemaRef, D);
	}

	bool Sema::isModuleVisible(const Module *M, bool ModulePrivate) {
	// [module.global.frag]p2:
	// A global-module-fragment specifies the contents of the global module
	// fragment for a module unit. The global module fragment can be used to
	// provide declarations that are attached to the global module and usable
	// within the module unit.
	//
	// Global module fragment is special. Global Module fragment is only usable
	// within the module unit it got defined [module.global.frag]p2. So here we
	// check if the Module is the global module fragment in current translation
	// unit.
	if (M->isGlobalModule() && M != this->GlobalModuleFragment)
	return false;

	// The module might be ordinarily visible. For a module-private query, that
	// means it is part of the current module.
	if (ModulePrivate && isUsableModule(M))
	return true;

	// For a query which is not module-private, that means it is in our visible
	// module set.
	if (!ModulePrivate && VisibleModules.isVisible(M))
	return true;

	// Otherwise, it might be visible by virtue of the query being within a
	// template instantiation or similar that is permitted to look inside M.

	// Find the extra places where we need to look.
	const auto &LookupModules = getLookupModules();
	if (LookupModules.empty())
	return false;

	// If our lookup set contains the module, it's visible.
	if (LookupModules.count(M))
	return true;

	// For a module-private query, that's everywhere we get to look.
	if (ModulePrivate)
	return false;

	// Check whether M is transitively exported to an import of the lookup set.
	return llvm::any_of(LookupModules, [&](const Module *LookupM) {
	return LookupM->isModuleVisible(M);
	});
	}

	// FIXME: Return false directly if we don't have an interface dependency on the
	// translation unit containing D.
	bool LookupResult::isReachableSlow(Sema &SemaRef, NamedDecl *D) {
	assert(!isVisible(SemaRef, D) && "Shouldn't call the slow case.\n");

	Module *DeclModule = SemaRef.getOwningModule(D);
	assert(DeclModule && "hidden decl has no owning module");

	// Entities in module map modules are reachable only if they're visible.
	if (DeclModule->isModuleMapModule())
	return false;

	// If D comes from a module and SemaRef doesn't own a module, it implies D
	// comes from another TU. In case SemaRef owns a module, we could judge if D
	// comes from another TU by comparing the module unit.
	//
	// FIXME: It would look better if we have direct method to judge whether D is
	// in another TU.
	if (SemaRef.getCurrentModule() &&
	SemaRef.getCurrentModule()->getTopLevelModule() ==
	DeclModule->getTopLevelModule())
	return true;

	// [module.reach]/p3:
	// A declaration D is reachable from a point P if:
	// ...
	// - D is not discarded ([module.global.frag]), appears in a translation unit
	// that is reachable from P, and does not appear within a private module
	// fragment.
	//
	// A declaration that's discarded in the GMF should be module-private.
	if (D->isModulePrivate())
	return false;

	// [module.reach]/p1
	// A translation unit U is necessarily reachable from a point P if U is a
	// module interface unit on which the translation unit containing P has an
	// interface dependency, or the translation unit containing P imports U, in
	// either case prior to P ([module.import]).
	//
	// [module.import]/p10
	// A translation unit has an interface dependency on a translation unit U if
	// it contains a declaration (possibly a module-declaration) that imports U
	// or if it has an interface dependency on a translation unit that has an
	// interface dependency on U.
	//
	// So we could conclude the module unit U is necessarily reachable if:
	// (1) The module unit U is module interface unit.
	// (2) The current unit has an interface dependency on the module unit U.
	//
	// Here we only check for the first condition. Since we couldn't see
	// DeclModule if it isn't (transitively) imported.
	if (DeclModule->getTopLevelModule()->isModuleInterfaceUnit())
	return true;

	// [module.reach]/p2
	// Additional translation units on
	// which the point within the program has an interface dependency may be
	// considered reachable, but it is unspecified which are and under what
	// circumstances.
	//
	// The decision here is to treat all additional tranditional units as
	// unreachable.
	return false;
	}

	bool Sema::isAcceptableSlow(const NamedDecl *D, Sema::AcceptableKind Kind) {
	return LookupResult::isAcceptable(this, const_cast<NamedDecl >(D), Kind);
	}

	bool Sema::shouldLinkPossiblyHiddenDecl(LookupResult &R, const NamedDecl *New) {
	// FIXME: If there are both visible and hidden declarations, we need to take
	// into account whether redeclaration is possible. Example:
	//
	// Non-imported module:
	// int f(T); // #1
	// Some TU:
	// static int f(U); // #2, not a redeclaration of #1
	// int f(T); // #3, finds both, should link with #1 if T != U, but
	// // with #2 if T == U; neither should be ambiguous.
	for (auto *D : R) {
	if (isVisible(D))
	return true;
	assert(D->isExternallyDeclarable() &&
	"should not have hidden, non-externally-declarable result here");
	}

	// This function is called once "New" is essentially complete, but before a
	// previous declaration is attached. We can't query the linkage of "New" in
	// general, because attaching the previous declaration can change the
	// linkage of New to match the previous declaration.
	//
	// However, because we've just determined that there is no visible prior
	// declaration, we can compute the linkage here. There are two possibilities:
	//
	// * This is not a redeclaration; it's safe to compute the linkage now.
	//
	// * This is a redeclaration of a prior declaration that is externally
	// redeclarable. In that case, the linkage of the declaration is not
	// changed by attaching the prior declaration, because both are externally
	// declarable (and thus ExternalLinkage or VisibleNoLinkage).
	//
	// FIXME: This is subtle and fragile.
	return New->isExternallyDeclarable();
	}

	/// Retrieve the visible declaration corresponding to D, if any.
	///
	/// This routine determines whether the declaration D is visible in the current
	/// module, with the current imports. If not, it checks whether any
	/// redeclaration of D is visible, and if so, returns that declaration.
	///
	/// \returns D, or a visible previous declaration of D, whichever is more recent
	/// and visible. If no declaration of D is visible, returns null.
	static NamedDecl findAcceptableDecl(Sema &SemaRef, NamedDecl D,
	unsigned IDNS) {
	assert(!LookupResult::isAvailableForLookup(SemaRef, D) && "not in slow case");

	for (auto RD : D->redecls()) {
	// Don't bother with extra checks if we already know this one isn't visible.
	if (RD == D)
	continue;

	auto ND = cast<NamedDecl>(RD);
	// FIXME: This is wrong in the case where the previous declaration is not
	// visible in the same scope as D. This needs to be done much more
	// carefully.
	if (ND->isInIdentifierNamespace(IDNS) &&
	LookupResult::isAvailableForLookup(SemaRef, ND))
	return ND;
	}

	return nullptr;
	}

	bool Sema::hasVisibleDeclarationSlow(const NamedDecl *D,
	llvm::SmallVectorImpl<Module > Modules) {
	assert(!isVisible(D) && "not in slow case");
	return hasAcceptableDeclarationImpl(
	this, D, Modules, [](const NamedDecl ) { return true; },
	Sema::AcceptableKind::Visible);
	}

	bool Sema::hasReachableDeclarationSlow(
	const NamedDecl D, llvm::SmallVectorImpl<Module > *Modules) {
	assert(!isReachable(D) && "not in slow case");
	return hasAcceptableDeclarationImpl(
	this, D, Modules, [](const NamedDecl ) { return true; },
	Sema::AcceptableKind::Reachable);
	}

	NamedDecl LookupResult::getAcceptableDeclSlow(NamedDecl D) const {
	if (auto *ND = dyn_cast<NamespaceDecl>(D)) {
	// Namespaces are a bit of a special case: we expect there to be a lot of
	// redeclarations of some namespaces, all declarations of a namespace are
	// essentially interchangeable, all declarations are found by name lookup
	// if any is, and namespaces are never looked up during template
	// instantiation. So we benefit from caching the check in this case, and
	// it is correct to do so.
	auto *Key = ND->getCanonicalDecl();
	if (auto *Acceptable = getSema().VisibleNamespaceCache.lookup(Key))
	return Acceptable;
	auto *Acceptable = isVisible(getSema(), Key)
	? Key
	: findAcceptableDecl(getSema(), Key, IDNS);
	if (Acceptable)
	getSema().VisibleNamespaceCache.insert(std::make_pair(Key, Acceptable));
	return Acceptable;
	}

	return findAcceptableDecl(getSema(), D, IDNS);
	}

	bool LookupResult::isVisible(Sema &SemaRef, NamedDecl *D) {
	// If this declaration is already visible, return it directly.
	if (D->isUnconditionallyVisible())
	return true;

	// During template instantiation, we can refer to hidden declarations, if
	// they were visible in any module along the path of instantiation.
	return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Visible);
	}

	bool LookupResult::isReachable(Sema &SemaRef, NamedDecl *D) {
	if (D->isUnconditionallyVisible())
	return true;

	return isAcceptableSlow(SemaRef, D, Sema::AcceptableKind::Reachable);
	}

	bool LookupResult::isAvailableForLookup(Sema &SemaRef, NamedDecl *ND) {
	// We should check the visibility at the callsite already.
	if (isVisible(SemaRef, ND))
	return true;

	// Deduction guide lives in namespace scope generally, but it is just a
	// hint to the compilers. What we actually lookup for is the generated member
	// of the corresponding template. So it is sufficient to check the
	// reachability of the template decl.
	if (auto *DeductionGuide = ND->getDeclName().getCXXDeductionGuideTemplate())
	return SemaRef.hasReachableDefinition(DeductionGuide);

	auto *DC = ND->getDeclContext();
	// If ND is not visible and it is at namespace scope, it shouldn't be found
	// by name lookup.
	if (DC->isFileContext())
	return false;

	// [module.interface]p7
	// Class and enumeration member names can be found by name lookup in any
	// context in which a definition of the type is reachable.
	//
	// FIXME: The current implementation didn't consider about scope. For example,
	// ```
	// // m.cppm
	// export module m;
	// enum E1 { e1 };
	// // Use.cpp
	// import m;
	// void test() {
	// auto a = E1::e1; // Error as expected.
	// auto b = e1; // Should be error. namespace-scope name e1 is not visible
	// }
	// ```
	// For the above example, the current implementation would emit error for `a`
	// correctly. However, the implementation wouldn't diagnose about `b` now.
	// Since we only check the reachability for the parent only.
	// See clang/test/CXX/module/module.interface/p7.cpp for example.
	if (auto *TD = dyn_cast<TagDecl>(DC))
	return SemaRef.hasReachableDefinition(TD);

	return false;
	}

	/// Perform unqualified name lookup starting from a given
	/// scope.
	///
	/// Unqualified name lookup (C++ [basic.lookup.unqual], C99 6.2.1) is
	/// used to find names within the current scope. For example, 'x' in
	/// @code
	/// int x;
	/// int f() {
	/// return x; // unqualified name look finds 'x' in the global scope
	/// }
	/// @endcode
	///
	/// Different lookup criteria can find different names. For example, a
	/// particular scope can have both a struct and a function of the same
	/// name, and each can be found by certain lookup criteria. For more
	/// information about lookup criteria, see the documentation for the
	/// class LookupCriteria.
	///
	/// @param S The scope from which unqualified name lookup will
	/// begin. If the lookup criteria permits, name lookup may also search
	/// in the parent scopes.
	///
	/// @param [in,out] R Specifies the lookup to perform (e.g., the name to
	/// look up and the lookup kind), and is updated with the results of lookup
	/// including zero or more declarations and possibly additional information
	/// used to diagnose ambiguities.
	///
	/// @returns \c true if lookup succeeded and false otherwise.
	bool Sema::LookupName(LookupResult &R, Scope *S, bool AllowBuiltinCreation,
	bool ForceNoCPlusPlus) {
	DeclarationName Name = R.getLookupName();
	if (!Name) return false;

	LookupNameKind NameKind = R.getLookupKind();

	if (!getLangOpts().CPlusPlus \|\| ForceNoCPlusPlus) {
	// Unqualified name lookup in C/Objective-C is purely lexical, so
	// search in the declarations attached to the name.
	if (NameKind == Sema::LookupRedeclarationWithLinkage) {
	// Find the nearest non-transparent declaration scope.
	while (!(S->getFlags() & Scope::DeclScope) \|\|
	(S->getEntity() && S->getEntity()->isTransparentContext()))
	S = S->getParent();
	}

	// When performing a scope lookup, we want to find local extern decls.
	FindLocalExternScope FindLocals(R);

	// Scan up the scope chain looking for a decl that matches this
	// identifier that is in the appropriate namespace. This search
	// should not take long, as shadowing of names is uncommon, and
	// deep shadowing is extremely uncommon.
	bool LeftStartingScope = false;

	for (IdentifierResolver::iterator I = IdResolver.begin(Name),
	IEnd = IdResolver.end();
	I != IEnd; ++I)
	if (NamedDecl D = R.getAcceptableDecl(I)) {
	if (NameKind == LookupRedeclarationWithLinkage) {
	// Determine whether this (or a previous) declaration is
	// out-of-scope.
	if (!LeftStartingScope && !S->isDeclScope(*I))
	LeftStartingScope = true;

	// If we found something outside of our starting scope that
	// does not have linkage, skip it.
	if (LeftStartingScope && !((*I)->hasLinkage())) {
	R.setShadowed();
	continue;
	}
	}
	else if (NameKind == LookupObjCImplicitSelfParam &&
	!isa<ImplicitParamDecl>(*I))
	continue;

	R.addDecl(D);

	// Check whether there are any other declarations with the same name
	// and in the same scope.
	if (I != IEnd) {
	// Find the scope in which this declaration was declared (if it
	// actually exists in a Scope).
	while (S && !S->isDeclScope(D))
	S = S->getParent();

	// If the scope containing the declaration is the translation unit,
	// then we'll need to perform our checks based on the matching
	// DeclContexts rather than matching scopes.
	if (S && isNamespaceOrTranslationUnitScope(S))
	S = nullptr;

	// Compute the DeclContext, if we need it.
	DeclContext *DC = nullptr;
	if (!S)
	DC = (*I)->getDeclContext()->getRedeclContext();

	IdentifierResolver::iterator LastI = I;
	for (++LastI; LastI != IEnd; ++LastI) {
	if (S) {
	// Match based on scope.
	if (!S->isDeclScope(*LastI))
	break;
	} else {
	// Match based on DeclContext.
	DeclContext *LastDC
	= (*LastI)->getDeclContext()->getRedeclContext();
	if (!LastDC->Equals(DC))
	break;
	}

	// If the declaration is in the right namespace and visible, add it.
	if (NamedDecl LastD = R.getAcceptableDecl(LastI))
	R.addDecl(LastD);
	}

	R.resolveKind();
	}

	return true;
	}
	} else {
	// Perform C++ unqualified name lookup.
	if (CppLookupName(R, S))
	return true;
	}

	// If we didn't find a use of this identifier, and if the identifier
	// corresponds to a compiler builtin, create the decl object for the builtin
	// now, injecting it into translation unit scope, and return it.
	if (AllowBuiltinCreation && LookupBuiltin(R))
	return true;

	// If we didn't find a use of this identifier, the ExternalSource
	// may be able to handle the situation.
	// Note: some lookup failures are expected!
	// See e.g. R.isForRedeclaration().
	return (ExternalSource && ExternalSource->LookupUnqualified(R, S));
	}

	/// Perform qualified name lookup in the namespaces nominated by
	/// using directives by the given context.
	///
	/// C++98 [namespace.qual]p2:
	/// Given X::m (where X is a user-declared namespace), or given \::m
	/// (where X is the global namespace), let S be the set of all
	/// declarations of m in X and in the transitive closure of all
	/// namespaces nominated by using-directives in X and its used
	/// namespaces, except that using-directives are ignored in any
	/// namespace, including X, directly containing one or more
	/// declarations of m. No namespace is searched more than once in
	/// the lookup of a name. If S is the empty set, the program is
	/// ill-formed. Otherwise, if S has exactly one member, or if the
	/// context of the reference is a using-declaration
	/// (namespace.udecl), S is the required set of declarations of
	/// m. Otherwise if the use of m is not one that allows a unique
	/// declaration to be chosen from S, the program is ill-formed.
	///
	/// C++98 [namespace.qual]p5:
	/// During the lookup of a qualified namespace member name, if the
	/// lookup finds more than one declaration of the member, and if one
	/// declaration introduces a class name or enumeration name and the
	/// other declarations either introduce the same object, the same
	/// enumerator or a set of functions, the non-type name hides the
	/// class or enumeration name if and only if the declarations are
	/// from the same namespace; otherwise (the declarations are from
	/// different namespaces), the program is ill-formed.
	static bool LookupQualifiedNameInUsingDirectives(Sema &S, LookupResult &R,
	DeclContext *StartDC) {
	assert(StartDC->isFileContext() && "start context is not a file context");

	// We have not yet looked into these namespaces, much less added
	// their "using-children" to the queue.
	SmallVector<NamespaceDecl*, 8> Queue;

	// We have at least added all these contexts to the queue.
	llvm::SmallPtrSet<DeclContext*, 8> Visited;
	Visited.insert(StartDC);

	// We have already looked into the initial namespace; seed the queue
	// with its using-children.
	for (auto *I : StartDC->using_directives()) {
	NamespaceDecl *ND = I->getNominatedNamespace()->getOriginalNamespace();
	if (S.isVisible(I) && Visited.insert(ND).second)
	Queue.push_back(ND);
	}

	// The easiest way to implement the restriction in [namespace.qual]p5
	// is to check whether any of the individual results found a tag
	// and, if so, to declare an ambiguity if the final result is not
	// a tag.
	bool FoundTag = false;
	bool FoundNonTag = false;

	LookupResult LocalR(LookupResult::Temporary, R);

	bool Found = false;
	while (!Queue.empty()) {
	NamespaceDecl *ND = Queue.pop_back_val();

	// We go through some convolutions here to avoid copying results
	// between LookupResults.
	bool UseLocal = !R.empty();
	LookupResult &DirectR = UseLocal ? LocalR : R;
	bool FoundDirect = LookupDirect(S, DirectR, ND);

	if (FoundDirect) {
	// First do any local hiding.
	DirectR.resolveKind();

	// If the local result is a tag, remember that.
	if (DirectR.isSingleTagDecl())
	FoundTag = true;
	else
	FoundNonTag = true;

	// Append the local results to the total results if necessary.
	if (UseLocal) {
	R.addAllDecls(LocalR);
	LocalR.clear();
	}
	}

	// If we find names in this namespace, ignore its using directives.
	if (FoundDirect) {
	Found = true;
	continue;
	}

	for (auto I : ND->using_directives()) {
	NamespaceDecl *Nom = I->getNominatedNamespace();
	if (S.isVisible(I) && Visited.insert(Nom).second)
	Queue.push_back(Nom);
	}
	}

	if (Found) {
	if (FoundTag && FoundNonTag)
	R.setAmbiguousQualifiedTagHiding();
	else
	R.resolveKind();
	}

	return Found;
	}

	/// Perform qualified name lookup into a given context.
	///
	/// Qualified name lookup (C++ [basic.lookup.qual]) is used to find
	/// names when the context of those names is explicit specified, e.g.,
	/// "std::vector" or "x->member", or as part of unqualified name lookup.
	///
	/// Different lookup criteria can find different names. For example, a
	/// particular scope can have both a struct and a function of the same
	/// name, and each can be found by certain lookup criteria. For more
	/// information about lookup criteria, see the documentation for the
	/// class LookupCriteria.
	///
	/// \param R captures both the lookup criteria and any lookup results found.
	///
	/// \param LookupCtx The context in which qualified name lookup will
	/// search. If the lookup criteria permits, name lookup may also search
	/// in the parent contexts or (for C++ classes) base classes.
	///
	/// \param InUnqualifiedLookup true if this is qualified name lookup that
	/// occurs as part of unqualified name lookup.
	///
	/// \returns true if lookup succeeded, false if it failed.
	bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
	bool InUnqualifiedLookup) {
	assert(LookupCtx && "Sema::LookupQualifiedName requires a lookup context");

	if (!R.getLookupName())
	return false;

	// Make sure that the declaration context is complete.
	assert((!isa<TagDecl>(LookupCtx) \|\|
	LookupCtx->isDependentContext() \|\|
	cast<TagDecl>(LookupCtx)->isCompleteDefinition() \|\|
	cast<TagDecl>(LookupCtx)->isBeingDefined()) &&
	"Declaration context must already be complete!");

	struct QualifiedLookupInScope {
	bool oldVal;
	DeclContext *Context;
	// Set flag in DeclContext informing debugger that we're looking for qualified name
	QualifiedLookupInScope(DeclContext *ctx) : Context(ctx) {
	oldVal = ctx->setUseQualifiedLookup();
	}
	~QualifiedLookupInScope() {
	Context->setUseQualifiedLookup(oldVal);
	}
	} QL(LookupCtx);

	if (LookupDirect(*this, R, LookupCtx)) {
	R.resolveKind();
	if (isa<CXXRecordDecl>(LookupCtx))
	R.setNamingClass(cast<CXXRecordDecl>(LookupCtx));
	return true;
	}

	// Don't descend into implied contexts for redeclarations.
	// C++98 [namespace.qual]p6:
	// In a declaration for a namespace member in which the
	// declarator-id is a qualified-id, given that the qualified-id
	// for the namespace member has the form
	// nested-name-specifier unqualified-id
	// the unqualified-id shall name a member of the namespace
	// designated by the nested-name-specifier.
	// See also [class.mfct]p5 and [class.static.data]p2.
	if (R.isForRedeclaration())
	return false;

	// If this is a namespace, look it up in the implied namespaces.
	if (LookupCtx->isFileContext())
	return LookupQualifiedNameInUsingDirectives(*this, R, LookupCtx);

	// If this isn't a C++ class, we aren't allowed to look into base
	// classes, we're done.
	CXXRecordDecl *LookupRec = dyn_cast<CXXRecordDecl>(LookupCtx);
	if (!LookupRec \|\| !LookupRec->getDefinition())
	return false;

	// We're done for lookups that can never succeed for C++ classes.
	if (R.getLookupKind() == LookupOperatorName \|\|
	R.getLookupKind() == LookupNamespaceName \|\|
	R.getLookupKind() == LookupObjCProtocolName \|\|
	R.getLookupKind() == LookupLabel)
	return false;

	// If we're performing qualified name lookup into a dependent class,
	// then we are actually looking into a current instantiation. If we have any
	// dependent base classes, then we either have to delay lookup until
	// template instantiation time (at which point all bases will be available)
	// or we have to fail.
	if (!InUnqualifiedLookup && LookupRec->isDependentContext() &&
	LookupRec->hasAnyDependentBases()) {
	R.setNotFoundInCurrentInstantiation();
	return false;
	}

	// Perform lookup into our base classes.

	DeclarationName Name = R.getLookupName();
	unsigned IDNS = R.getIdentifierNamespace();

	// Look for this member in our base classes.
	auto BaseCallback = [Name, IDNS](const CXXBaseSpecifier *Specifier,
	CXXBasePath &Path) -> bool {
	CXXRecordDecl *BaseRecord = Specifier->getType()->getAsCXXRecordDecl();
	// Drop leading non-matching lookup results from the declaration list so
	// we don't need to consider them again below.
	for (Path.Decls = BaseRecord->lookup(Name).begin();
	Path.Decls != Path.Decls.end(); ++Path.Decls) {
	if ((*Path.Decls)->isInIdentifierNamespace(IDNS))
	return true;
	}
	return false;
	};

	CXXBasePaths Paths;
	Paths.setOrigin(LookupRec);
	if (!LookupRec->lookupInBases(BaseCallback, Paths))
	return false;

	R.setNamingClass(LookupRec);

	// C++ [class.member.lookup]p2:
	// [...] If the resulting set of declarations are not all from
	// sub-objects of the same type, or the set has a nonstatic member
	// and includes members from distinct sub-objects, there is an
	// ambiguity and the program is ill-formed. Otherwise that set is
	// the result of the lookup.
	QualType SubobjectType;
	int SubobjectNumber = 0;
	AccessSpecifier SubobjectAccess = AS_none;

	// Check whether the given lookup result contains only static members.
	auto HasOnlyStaticMembers = [&](DeclContext::lookup_iterator Result) {
	for (DeclContext::lookup_iterator I = Result, E = I.end(); I != E; ++I)
	if ((I)->isInIdentifierNamespace(IDNS) && (I)->isCXXInstanceMember())
	return false;
	return true;
	};

	bool TemplateNameLookup = R.isTemplateNameLookup();

	// Determine whether two sets of members contain the same members, as
	// required by C++ [class.member.lookup]p6.
	auto HasSameDeclarations = [&](DeclContext::lookup_iterator A,
	DeclContext::lookup_iterator B) {
	using Iterator = DeclContextLookupResult::iterator;
	using Result = const void *;

	auto Next = [&](Iterator &It, Iterator End) -> Result {
	while (It != End) {
	NamedDecl ND = It++;
	if (!ND->isInIdentifierNamespace(IDNS))
	continue;

	// C++ [temp.local]p3:
	// A lookup that finds an injected-class-name (10.2) can result in
	// an ambiguity in certain cases (for example, if it is found in
	// more than one base class). If all of the injected-class-names
	// that are found refer to specializations of the same class
	// template, and if the name is used as a template-name, the
	// reference refers to the class template itself and not a
	// specialization thereof, and is not ambiguous.
	if (TemplateNameLookup)
	if (auto *TD = getAsTemplateNameDecl(ND))
	ND = TD;

	// C++ [class.member.lookup]p3:
	// type declarations (including injected-class-names) are replaced by
	// the types they designate
	if (const TypeDecl *TD = dyn_cast<TypeDecl>(ND->getUnderlyingDecl())) {
	QualType T = Context.getTypeDeclType(TD);
	return T.getCanonicalType().getAsOpaquePtr();
	}

	return ND->getUnderlyingDecl()->getCanonicalDecl();
	}
	return nullptr;
	};

	// We'll often find the declarations are in the same order. Handle this
	// case (and the special case of only one declaration) efficiently.
	Iterator AIt = A, BIt = B, AEnd, BEnd;
	while (true) {
	Result AResult = Next(AIt, AEnd);
	Result BResult = Next(BIt, BEnd);
	if (!AResult && !BResult)
	return true;
	if (!AResult \|\| !BResult)
	return false;
	if (AResult != BResult) {
	// Found a mismatch; carefully check both lists, accounting for the
	// possibility of declarations appearing more than once.
	llvm::SmallDenseMap<Result, bool, 32> AResults;
	for (; AResult; AResult = Next(AIt, AEnd))
	AResults.insert({AResult, /FoundInB/false});
	unsigned Found = 0;
	for (; BResult; BResult = Next(BIt, BEnd)) {
	auto It = AResults.find(BResult);
	if (It == AResults.end())
	return false;
	if (!It->second) {
	It->second = true;
	++Found;
	}
	}
	return AResults.size() == Found;
	}
	}
	};

	for (CXXBasePaths::paths_iterator Path = Paths.begin(), PathEnd = Paths.end();
	Path != PathEnd; ++Path) {
	const CXXBasePathElement &PathElement = Path->back();

	// Pick the best (i.e. most permissive i.e. numerically lowest) access
	// across all paths.
	SubobjectAccess = std::min(SubobjectAccess, Path->Access);

	// Determine whether we're looking at a distinct sub-object or not.
	if (SubobjectType.isNull()) {
	// This is the first subobject we've looked at. Record its type.
	SubobjectType = Context.getCanonicalType(PathElement.Base->getType());
	SubobjectNumber = PathElement.SubobjectNumber;
	continue;
	}

	if (SubobjectType !=
	Context.getCanonicalType(PathElement.Base->getType())) {
	// We found members of the given name in two subobjects of
	// different types. If the declaration sets aren't the same, this
	// lookup is ambiguous.
	//
	// FIXME: The language rule says that this applies irrespective of
	// whether the sets contain only static members.
	if (HasOnlyStaticMembers(Path->Decls) &&
	HasSameDeclarations(Paths.begin()->Decls, Path->Decls))
	continue;

	R.setAmbiguousBaseSubobjectTypes(Paths);
	return true;
	}

	// FIXME: This language rule no longer exists. Checking for ambiguous base
	// subobjects should be done as part of formation of a class member access
	// expression (when converting the object parameter to the member's type).
	if (SubobjectNumber != PathElement.SubobjectNumber) {
	// We have a different subobject of the same type.

	// C++ [class.member.lookup]p5:
	// A static member, a nested type or an enumerator defined in
	// a base class T can unambiguously be found even if an object
	// has more than one base class subobject of type T.
	if (HasOnlyStaticMembers(Path->Decls))
	continue;

	// We have found a nonstatic member name in multiple, distinct
	// subobjects. Name lookup is ambiguous.
	R.setAmbiguousBaseSubobjects(Paths);
	return true;
	}
	}

	// Lookup in a base class succeeded; return these results.

	for (DeclContext::lookup_iterator I = Paths.front().Decls, E = I.end();
	I != E; ++I) {
	AccessSpecifier AS = CXXRecordDecl::MergeAccess(SubobjectAccess,
	(*I)->getAccess());
	if (NamedDecl ND = R.getAcceptableDecl(I))
	R.addDecl(ND, AS);
	}
	R.resolveKind();
	return true;
	}

	/// Performs qualified name lookup or special type of lookup for
	/// "__super::" scope specifier.
	///
	/// This routine is a convenience overload meant to be called from contexts
	/// that need to perform a qualified name lookup with an optional C++ scope
	/// specifier that might require special kind of lookup.
	///
	/// \param R captures both the lookup criteria and any lookup results found.
	///
	/// \param LookupCtx The context in which qualified name lookup will
	/// search.
	///
	/// \param SS An optional C++ scope-specifier.
	///
	/// \returns true if lookup succeeded, false if it failed.
	bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
	CXXScopeSpec &SS) {
	auto *NNS = SS.getScopeRep();
	if (NNS && NNS->getKind() == NestedNameSpecifier::Super)
	return LookupInSuper(R, NNS->getAsRecordDecl());
	else

	return LookupQualifiedName(R, LookupCtx);
	}

	/// Performs name lookup for a name that was parsed in the
	/// source code, and may contain a C++ scope specifier.
	///
	/// This routine is a convenience routine meant to be called from
	/// contexts that receive a name and an optional C++ scope specifier
	/// (e.g., "N::M::x"). It will then perform either qualified or
	/// unqualified name lookup (with LookupQualifiedName or LookupName,
	/// respectively) on the given name and return those results. It will
	/// perform a special type of lookup for "__super::" scope specifier.
	///
	/// @param S The scope from which unqualified name lookup will
	/// begin.
	///
	/// @param SS An optional C++ scope-specifier, e.g., "::N::M".
	///
	/// @param EnteringContext Indicates whether we are going to enter the
	/// context of the scope-specifier SS (if present).
	///
	/// @returns True if any decls were found (but possibly ambiguous)
	bool Sema::LookupParsedName(LookupResult &R, Scope S, CXXScopeSpec SS,
	bool AllowBuiltinCreation, bool EnteringContext) {
	if (SS && SS->isInvalid()) {
	// When the scope specifier is invalid, don't even look for
	// anything.
	return false;
	}

	if (SS && SS->isSet()) {
	NestedNameSpecifier *NNS = SS->getScopeRep();
	if (NNS->getKind() == NestedNameSpecifier::Super)
	return LookupInSuper(R, NNS->getAsRecordDecl());

	if (DeclContext DC = computeDeclContext(SS, EnteringContext)) {
	// We have resolved the scope specifier to a particular declaration
	// contex, and will perform name lookup in that context.
	if (!DC->isDependentContext() && RequireCompleteDeclContext(*SS, DC))
	return false;

	R.setContextRange(SS->getRange());
	return LookupQualifiedName(R, DC);
	}

	// We could not resolve the scope specified to a specific declaration
	// context, which means that SS refers to an unknown specialization.
	// Name lookup can't find anything in this case.
	R.setNotFoundInCurrentInstantiation();
	R.setContextRange(SS->getRange());
	return false;
	}

	// Perform unqualified name lookup starting in the given scope.
	return LookupName(R, S, AllowBuiltinCreation);
	}

	/// Perform qualified name lookup into all base classes of the given
	/// class.
	///
	/// \param R captures both the lookup criteria and any lookup results found.
	///
	/// \param Class The context in which qualified name lookup will
	/// search. Name lookup will search in all base classes merging the results.
	///
	/// @returns True if any decls were found (but possibly ambiguous)
	bool Sema::LookupInSuper(LookupResult &R, CXXRecordDecl *Class) {
	// The access-control rules we use here are essentially the rules for
	// doing a lookup in Class that just magically skipped the direct
	// members of Class itself. That is, the naming class is Class, and the
	// access includes the access of the base.
	for (const auto &BaseSpec : Class->bases()) {
	CXXRecordDecl *RD = cast<CXXRecordDecl>(
	BaseSpec.getType()->castAs<RecordType>()->getDecl());
	LookupResult Result(*this, R.getLookupNameInfo(), R.getLookupKind());
	Result.setBaseObjectType(Context.getRecordType(Class));
	LookupQualifiedName(Result, RD);

	// Copy the lookup results into the target, merging the base's access into
	// the path access.
	for (auto I = Result.begin(), E = Result.end(); I != E; ++I) {
	R.addDecl(I.getDecl(),
	CXXRecordDecl::MergeAccess(BaseSpec.getAccessSpecifier(),
	I.getAccess()));
	}

	Result.suppressDiagnostics();
	}

	R.resolveKind();
	R.setNamingClass(Class);

	return !R.empty();
	}

	/// Produce a diagnostic describing the ambiguity that resulted
	/// from name lookup.
	///
	/// \param Result The result of the ambiguous lookup to be diagnosed.
	void Sema::DiagnoseAmbiguousLookup(LookupResult &Result) {
	assert(Result.isAmbiguous() && "Lookup result must be ambiguous");

	DeclarationName Name = Result.getLookupName();
	SourceLocation NameLoc = Result.getNameLoc();
	SourceRange LookupRange = Result.getContextRange();

	switch (Result.getAmbiguityKind()) {
	case LookupResult::AmbiguousBaseSubobjects: {
	CXXBasePaths *Paths = Result.getBasePaths();
	QualType SubobjectType = Paths->front().back().Base->getType();
	Diag(NameLoc, diag::err_ambiguous_member_multiple_subobjects)
	<< Name << SubobjectType << getAmbiguousPathsDisplayString(*Paths)
	<< LookupRange;

	DeclContext::lookup_iterator Found = Paths->front().Decls;
	while (isa<CXXMethodDecl>(*Found) &&
	cast<CXXMethodDecl>(*Found)->isStatic())
	++Found;

	Diag((*Found)->getLocation(), diag::note_ambiguous_member_found);
	break;
	}

	case LookupResult::AmbiguousBaseSubobjectTypes: {
	Diag(NameLoc, diag::err_ambiguous_member_multiple_subobject_types)
	<< Name << LookupRange;

	CXXBasePaths *Paths = Result.getBasePaths();
	std::set<const NamedDecl *> DeclsPrinted;
	for (CXXBasePaths::paths_iterator Path = Paths->begin(),
	PathEnd = Paths->end();
	Path != PathEnd; ++Path) {
	const NamedDecl D = Path->Decls;
	if (!D->isInIdentifierNamespace(Result.getIdentifierNamespace()))
	continue;
	if (DeclsPrinted.insert(D).second) {
	if (const auto *TD = dyn_cast<TypedefNameDecl>(D->getUnderlyingDecl()))
	Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
	<< TD->getUnderlyingType();
	else if (const auto *TD = dyn_cast<TypeDecl>(D->getUnderlyingDecl()))
	Diag(D->getLocation(), diag::note_ambiguous_member_type_found)
	<< Context.getTypeDeclType(TD);
	else
	Diag(D->getLocation(), diag::note_ambiguous_member_found);
	}
	}
	break;
	}

	case LookupResult::AmbiguousTagHiding: {
	Diag(NameLoc, diag::err_ambiguous_tag_hiding) << Name << LookupRange;

	llvm::SmallPtrSet<NamedDecl*, 8> TagDecls;

	for (auto *D : Result)
	if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
	TagDecls.insert(TD);
	Diag(TD->getLocation(), diag::note_hidden_tag);
	}

	for (auto *D : Result)
	if (!isa<TagDecl>(D))
	Diag(D->getLocation(), diag::note_hiding_object);

	// For recovery purposes, go ahead and implement the hiding.
	LookupResult::Filter F = Result.makeFilter();
	while (F.hasNext()) {
	if (TagDecls.count(F.next()))
	F.erase();
	}
	F.done();
	break;
	}

	case LookupResult::AmbiguousReference: {
	Diag(NameLoc, diag::err_ambiguous_reference) << Name << LookupRange;

	for (auto *D : Result)
	Diag(D->getLocation(), diag::note_ambiguous_candidate) << D;
	break;
	}
	}
	}

	namespace {
	struct AssociatedLookup {
	AssociatedLookup(Sema &S, SourceLocation InstantiationLoc,
	Sema::AssociatedNamespaceSet &Namespaces,
	Sema::AssociatedClassSet &Classes)
	: S(S), Namespaces(Namespaces), Classes(Classes),
	InstantiationLoc(InstantiationLoc) {
	}

	bool addClassTransitive(CXXRecordDecl *RD) {
	Classes.insert(RD);
	return ClassesTransitive.insert(RD);
	}

	Sema &S;
	Sema::AssociatedNamespaceSet &Namespaces;
	Sema::AssociatedClassSet &Classes;
	SourceLocation InstantiationLoc;

	private:
	Sema::AssociatedClassSet ClassesTransitive;
	};
	} // end anonymous namespace

	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType T);

	// Given the declaration context \param Ctx of a class, class template or
	// enumeration, add the associated namespaces to \param Namespaces as described
	// in [basic.lookup.argdep]p2.
	static void CollectEnclosingNamespace(Sema::AssociatedNamespaceSet &Namespaces,
	DeclContext *Ctx) {
	// The exact wording has been changed in C++14 as a result of
	// CWG 1691 (see also CWG 1690 and CWG 1692). We apply it unconditionally
	// to all language versions since it is possible to return a local type
	// from a lambda in C++11.
	//
	// C++14 [basic.lookup.argdep]p2:
	// If T is a class type [...]. Its associated namespaces are the innermost
	// enclosing namespaces of its associated classes. [...]
	//
	// If T is an enumeration type, its associated namespace is the innermost
	// enclosing namespace of its declaration. [...]

	// We additionally skip inline namespaces. The innermost non-inline namespace
	// contains all names of all its nested inline namespaces anyway, so we can
	// replace the entire inline namespace tree with its root.
	while (!Ctx->isFileContext() \|\| Ctx->isInlineNamespace())
	Ctx = Ctx->getParent();

	Namespaces.insert(Ctx->getPrimaryContext());
	}

	// Add the associated classes and namespaces for argument-dependent
	// lookup that involves a template argument (C++ [basic.lookup.argdep]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
	const TemplateArgument &Arg) {
	// C++ [basic.lookup.argdep]p2, last bullet:
	// -- [...] ;
	switch (Arg.getKind()) {
	case TemplateArgument::Null:
	break;

	case TemplateArgument::Type:
	// [...] the namespaces and classes associated with the types of the
	// template arguments provided for template type parameters (excluding
	// template template parameters)
	addAssociatedClassesAndNamespaces(Result, Arg.getAsType());
	break;

	case TemplateArgument::Template:
	case TemplateArgument::TemplateExpansion: {
	// [...] the namespaces in which any template template arguments are
	// defined; and the classes in which any member templates used as
	// template template arguments are defined.
	TemplateName Template = Arg.getAsTemplateOrTemplatePattern();
	if (ClassTemplateDecl *ClassTemplate
	= dyn_cast<ClassTemplateDecl>(Template.getAsTemplateDecl())) {
	DeclContext *Ctx = ClassTemplate->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);
	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);
	}
	break;
	}

	case TemplateArgument::Declaration:
	case TemplateArgument::Integral:
	case TemplateArgument::Expression:
	case TemplateArgument::NullPtr:
	// [Note: non-type template arguments do not contribute to the set of
	// associated namespaces. ]
	break;

	case TemplateArgument::Pack:
	for (const auto &P : Arg.pack_elements())
	addAssociatedClassesAndNamespaces(Result, P);
	break;
	}
	}

	// Add the associated classes and namespaces for argument-dependent lookup
	// with an argument of class type (C++ [basic.lookup.argdep]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
	CXXRecordDecl *Class) {

	// Just silently ignore anything whose name is __va_list_tag.
	if (Class->getDeclName() == Result.S.VAListTagName)
	return;

	// C++ [basic.lookup.argdep]p2:
	// [...]
	// -- If T is a class type (including unions), its associated
	// classes are: the class itself; the class of which it is a
	// member, if any; and its direct and indirect base classes.
	// Its associated namespaces are the innermost enclosing
	// namespaces of its associated classes.

	// Add the class of which it is a member, if any.
	DeclContext *Ctx = Class->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);

	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	// -- If T is a template-id, its associated namespaces and classes are
	// the namespace in which the template is defined; for member
	// templates, the member template's class; the namespaces and classes
	// associated with the types of the template arguments provided for
	// template type parameters (excluding template template parameters); the
	// namespaces in which any template template arguments are defined; and
	// the classes in which any member templates used as template template
	// arguments are defined. [Note: non-type template arguments do not
	// contribute to the set of associated namespaces. ]
	if (ClassTemplateSpecializationDecl *Spec
	= dyn_cast<ClassTemplateSpecializationDecl>(Class)) {
	DeclContext *Ctx = Spec->getSpecializedTemplate()->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);
	// Add the associated namespace for this class.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
	for (unsigned I = 0, N = TemplateArgs.size(); I != N; ++I)
	addAssociatedClassesAndNamespaces(Result, TemplateArgs[I]);
	}

	// Add the class itself. If we've already transitively visited this class,
	// we don't need to visit base classes.
	if (!Result.addClassTransitive(Class))
	return;

	// Only recurse into base classes for complete types.
	if (!Result.S.isCompleteType(Result.InstantiationLoc,
	Result.S.Context.getRecordType(Class)))
	return;

	// Add direct and indirect base classes along with their associated
	// namespaces.
	SmallVector<CXXRecordDecl *, 32> Bases;
	Bases.push_back(Class);
	while (!Bases.empty()) {
	// Pop this class off the stack.
	Class = Bases.pop_back_val();

	// Visit the base classes.
	for (const auto &Base : Class->bases()) {
	const RecordType *BaseType = Base.getType()->getAs<RecordType>();
	// In dependent contexts, we do ADL twice, and the first time around,
	// the base type might be a dependent TemplateSpecializationType, or a
	// TemplateTypeParmType. If that happens, simply ignore it.
	// FIXME: If we want to support export, we probably need to add the
	// namespace of the template in a TemplateSpecializationType, or even
	// the classes and namespaces of known non-dependent arguments.
	if (!BaseType)
	continue;
	CXXRecordDecl *BaseDecl = cast<CXXRecordDecl>(BaseType->getDecl());
	if (Result.addClassTransitive(BaseDecl)) {
	// Find the associated namespace for this base class.
	DeclContext *BaseCtx = BaseDecl->getDeclContext();
	CollectEnclosingNamespace(Result.Namespaces, BaseCtx);

	// Make sure we visit the bases of this base class.
	if (BaseDecl->bases_begin() != BaseDecl->bases_end())
	Bases.push_back(BaseDecl);
	}
	}
	}
	}

	// Add the associated classes and namespaces for
	// argument-dependent lookup with an argument of type T
	// (C++ [basic.lookup.koenig]p2).
	static void
	addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
	// C++ [basic.lookup.koenig]p2:
	//
	// For each argument type T in the function call, there is a set
	// of zero or more associated namespaces and a set of zero or more
	// associated classes to be considered. The sets of namespaces and
	// classes is determined entirely by the types of the function
	// arguments (and the namespace of any template template
	// argument). Typedef names and using-declarations used to specify
	// the types do not contribute to this set. The sets of namespaces
	// and classes are determined in the following way:

	SmallVector<const Type *, 16> Queue;
	const Type *T = Ty->getCanonicalTypeInternal().getTypePtr();

	while (true) {
	switch (T->getTypeClass()) {

	#define TYPE(Class, Base)
	#define DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_TYPE(Class, Base) case Type::Class:
	#define NON_CANONICAL_UNLESS_DEPENDENT_TYPE(Class, Base) case Type::Class:
	#define ABSTRACT_TYPE(Class, Base)
	#include "clang/AST/TypeNodes.inc"
	// T is canonical. We can also ignore dependent types because
	// we don't need to do ADL at the definition point, but if we
	// wanted to implement template export (or if we find some other
	// use for associated classes and namespaces...) this would be
	// wrong.
	break;

	// -- If T is a pointer to U or an array of U, its associated
	// namespaces and classes are those associated with U.
	case Type::Pointer:
	T = cast<PointerType>(T)->getPointeeType().getTypePtr();
	continue;
	case Type::ConstantArray:
	case Type::IncompleteArray:
	case Type::VariableArray:
	T = cast<ArrayType>(T)->getElementType().getTypePtr();
	continue;

	// -- If T is a fundamental type, its associated sets of
	// namespaces and classes are both empty.
	case Type::Builtin:
	break;

	// -- If T is a class type (including unions), its associated
	// classes are: the class itself; the class of which it is
	// a member, if any; and its direct and indirect base classes.
	// Its associated namespaces are the innermost enclosing
	// namespaces of its associated classes.
	case Type::Record: {
	CXXRecordDecl *Class =
	cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl());
	addAssociatedClassesAndNamespaces(Result, Class);
	break;
	}

	// -- If T is an enumeration type, its associated namespace
	// is the innermost enclosing namespace of its declaration.
	// If it is a class member, its associated class is the
	// member’s class; else it has no associated class.
	case Type::Enum: {
	EnumDecl *Enum = cast<EnumType>(T)->getDecl();

	DeclContext *Ctx = Enum->getDeclContext();
	if (CXXRecordDecl *EnclosingClass = dyn_cast<CXXRecordDecl>(Ctx))
	Result.Classes.insert(EnclosingClass);

	// Add the associated namespace for this enumeration.
	CollectEnclosingNamespace(Result.Namespaces, Ctx);

	break;
	}

	// -- If T is a function type, its associated namespaces and
	// classes are those associated with the function parameter
	// types and those associated with the return type.
	case Type::FunctionProto: {
	const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
	for (const auto &Arg : Proto->param_types())
	Queue.push_back(Arg.getTypePtr());
	// fallthrough
	LLVM_FALLTHROUGH;
	}
	case Type::FunctionNoProto: {
	const FunctionType *FnType = cast<FunctionType>(T);
	T = FnType->getReturnType().getTypePtr();
	continue;
	}

	// -- If T is a pointer to a member function of a class X, its
	// associated namespaces and classes are those associated
	// with the function parameter types and return type,
	// together with those associated with X.
	//
	// -- If T is a pointer to a data member of class X, its
	// associated namespaces and classes are those associated
	// with the member type together with those associated with
	// X.
	case Type::MemberPointer: {
	const MemberPointerType *MemberPtr = cast<MemberPointerType>(T);

	// Queue up the class type into which this points.
	Queue.push_back(MemberPtr->getClass());

	// And directly continue with the pointee type.
	T = MemberPtr->getPointeeType().getTypePtr();
	continue;
	}

	// As an extension, treat this like a normal pointer.
	case Type::BlockPointer:
	T = cast<BlockPointerType>(T)->getPointeeType().getTypePtr();
	continue;

	// References aren't covered by the standard, but that's such an
	// obvious defect that we cover them anyway.
	case Type::LValueReference:
	case Type::RValueReference:
	T = cast<ReferenceType>(T)->getPointeeType().getTypePtr();
	continue;

	// These are fundamental types.
	case Type::Vector:
	case Type::ExtVector:
	case Type::ConstantMatrix:
	case Type::Complex:
	case Type::BitInt:
	break;

	// Non-deduced auto types only get here for error cases.
	case Type::Auto:
	case Type::DeducedTemplateSpecialization:
	break;

	// If T is an Objective-C object or interface type, or a pointer to an
	// object or interface type, the associated namespace is the global
	// namespace.
	case Type::ObjCObject:
	case Type::ObjCInterface:
	case Type::ObjCObjectPointer:
	Result.Namespaces.insert(Result.S.Context.getTranslationUnitDecl());
	break;

	// Atomic types are just wrappers; use the associations of the
	// contained type.
	case Type::Atomic:
	T = cast<AtomicType>(T)->getValueType().getTypePtr();
	continue;
	case Type::Pipe:
	T = cast<PipeType>(T)->getElementType().getTypePtr();
	continue;
	}

	if (Queue.empty())
	break;
	T = Queue.pop_back_val();
	}
	}

	/// Find the associated classes and namespaces for
	/// argument-dependent lookup for a call with the given set of
	/// arguments.
	///
	/// This routine computes the sets of associated classes and associated
	/// namespaces searched by argument-dependent lookup
	/// (C++ [basic.lookup.argdep]) for a given set of arguments.
	void Sema::FindAssociatedClassesAndNamespaces(
	SourceLocation InstantiationLoc, ArrayRef<Expr *> Args,
	AssociatedNamespaceSet &AssociatedNamespaces,
	AssociatedClassSet &AssociatedClasses) {
	AssociatedNamespaces.clear();
	AssociatedClasses.clear();

	AssociatedLookup Result(*this, InstantiationLoc,
	AssociatedNamespaces, AssociatedClasses);

	// C++ [basic.lookup.koenig]p2:
	// For each argument type T in the function call, there is a set
	// of zero or more associated namespaces and a set of zero or more
	// associated classes to be considered. The sets of namespaces and
	// classes is determined entirely by the types of the function
	// arguments (and the namespace of any template template
	// argument).
	for (unsigned ArgIdx = 0; ArgIdx != Args.size(); ++ArgIdx) {
	Expr *Arg = Args[ArgIdx];

	if (Arg->getType() != Context.OverloadTy) {
	addAssociatedClassesAndNamespaces(Result, Arg->getType());
	continue;
	}

	// [...] In addition, if the argument is the name or address of a
	// set of overloaded functions and/or function templates, its
	// associated classes and namespaces are the union of those
	// associated with each of the members of the set: the namespace
	// in which the function or function template is defined and the
	// classes and namespaces associated with its (non-dependent)
	// parameter types and return type.
	OverloadExpr *OE = OverloadExpr::find(Arg).Expression;

	for (const NamedDecl *D : OE->decls()) {
	// Look through any using declarations to find the underlying function.
	const FunctionDecl *FDecl = D->getUnderlyingDecl()->getAsFunction();

	// Add the classes and namespaces associated with the parameter
	// types and return type of this function.
	addAssociatedClassesAndNamespaces(Result, FDecl->getType());
	}
	}
	}

	NamedDecl Sema::LookupSingleName(Scope S, DeclarationName Name,
	SourceLocation Loc,
	LookupNameKind NameKind,
	RedeclarationKind Redecl) {
	LookupResult R(*this, Name, Loc, NameKind, Redecl);
	LookupName(R, S);
	return R.getAsSingle<NamedDecl>();
	}

	/// Find the protocol with the given name, if any.
	ObjCProtocolDecl Sema::LookupProtocol(IdentifierInfo II,
	SourceLocation IdLoc,
	RedeclarationKind Redecl) {
	Decl *D = LookupSingleName(TUScope, II, IdLoc,
	LookupObjCProtocolName, Redecl);
	return cast_or_null<ObjCProtocolDecl>(D);
	}

	void Sema::LookupOverloadedOperatorName(OverloadedOperatorKind Op, Scope *S,
	UnresolvedSetImpl &Functions) {
	// C++ [over.match.oper]p3:
	// -- The set of non-member candidates is the result of the
	// unqualified lookup of operator@ in the context of the
	// expression according to the usual rules for name lookup in
	// unqualified function calls (3.4.2) except that all member
	// functions are ignored.
	DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op);
	LookupResult Operators(*this, OpName, SourceLocation(), LookupOperatorName);
	LookupName(Operators, S);

	assert(!Operators.isAmbiguous() && "Operator lookup cannot be ambiguous");
	Functions.append(Operators.begin(), Operators.end());
	}

	Sema::SpecialMemberOverloadResult Sema::LookupSpecialMember(CXXRecordDecl *RD,
	CXXSpecialMember SM,
	bool ConstArg,
	bool VolatileArg,
	bool RValueThis,
	bool ConstThis,
	bool VolatileThis) {
	assert(CanDeclareSpecialMemberFunction(RD) &&
	"doing special member lookup into record that isn't fully complete");
	RD = RD->getDefinition();
	if (RValueThis \|\| ConstThis \|\| VolatileThis)
	assert((SM == CXXCopyAssignment \|\| SM == CXXMoveAssignment) &&
	"constructors and destructors always have unqualified lvalue this");
	if (ConstArg \|\| VolatileArg)
	assert((SM != CXXDefaultConstructor && SM != CXXDestructor) &&
	"parameter-less special members can't have qualified arguments");

	// FIXME: Get the caller to pass in a location for the lookup.
	SourceLocation LookupLoc = RD->getLocation();

	llvm::FoldingSetNodeID ID;
	ID.AddPointer(RD);
	ID.AddInteger(SM);
	ID.AddInteger(ConstArg);
	ID.AddInteger(VolatileArg);
	ID.AddInteger(RValueThis);
	ID.AddInteger(ConstThis);
	ID.AddInteger(VolatileThis);

	void *InsertPoint;
	SpecialMemberOverloadResultEntry *Result =
	SpecialMemberCache.FindNodeOrInsertPos(ID, InsertPoint);

	// This was already cached
	if (Result)
	return *Result;

	Result = BumpAlloc.Allocate<SpecialMemberOverloadResultEntry>();
	Result = new (Result) SpecialMemberOverloadResultEntry(ID);
	SpecialMemberCache.InsertNode(Result, InsertPoint);

	if (SM == CXXDestructor) {
	if (RD->needsImplicitDestructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitDestructor(RD);
	});
	}
	CXXDestructorDecl *DD = RD->getDestructor();
	Result->setMethod(DD);
	Result->setKind(DD && !DD->isDeleted()
	? SpecialMemberOverloadResult::Success
	: SpecialMemberOverloadResult::NoMemberOrDeleted);
	return *Result;
	}

	// Prepare for overload resolution. Here we construct a synthetic argument
	// if necessary and make sure that implicit functions are declared.
	CanQualType CanTy = Context.getCanonicalType(Context.getTagDeclType(RD));
	DeclarationName Name;
	Expr *Arg = nullptr;
	unsigned NumArgs;

	QualType ArgType = CanTy;
	ExprValueKind VK = VK_LValue;

	if (SM == CXXDefaultConstructor) {
	Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
	NumArgs = 0;
	if (RD->needsImplicitDefaultConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitDefaultConstructor(RD);
	});
	}
	} else {
	if (SM == CXXCopyConstructor \|\| SM == CXXMoveConstructor) {
	Name = Context.DeclarationNames.getCXXConstructorName(CanTy);
	if (RD->needsImplicitCopyConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitCopyConstructor(RD);
	});
	}
	if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveConstructor()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitMoveConstructor(RD);
	});
	}
	} else {
	Name = Context.DeclarationNames.getCXXOperatorName(OO_Equal);
	if (RD->needsImplicitCopyAssignment()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitCopyAssignment(RD);
	});
	}
	if (getLangOpts().CPlusPlus11 && RD->needsImplicitMoveAssignment()) {
	runWithSufficientStackSpace(RD->getLocation(), [&] {
	DeclareImplicitMoveAssignment(RD);
	});
	}
	}

	if (ConstArg)
	ArgType.addConst();
	if (VolatileArg)
	ArgType.addVolatile();

	// This isn't /really/ specified by the standard, but it's implied
	// we should be working from a PRValue in the case of move to ensure
	// that we prefer to bind to rvalue references, and an LValue in the
	// case of copy to ensure we don't bind to rvalue references.
	// Possibly an XValue is actually correct in the case of move, but
	// there is no semantic difference for class types in this restricted
	// case.
	if (SM == CXXCopyConstructor \|\| SM == CXXCopyAssignment)
	VK = VK_LValue;
	else
	VK = VK_PRValue;
	}

	OpaqueValueExpr FakeArg(LookupLoc, ArgType, VK);

	if (SM != CXXDefaultConstructor) {
	NumArgs = 1;
	Arg = &FakeArg;
	}

	// Create the object argument
	QualType ThisTy = CanTy;
	if (ConstThis)
	ThisTy.addConst();
	if (VolatileThis)
	ThisTy.addVolatile();
	Expr::Classification Classification =
	OpaqueValueExpr(LookupLoc, ThisTy, RValueThis ? VK_PRValue : VK_LValue)
	.Classify(Context);

	// Now we perform lookup on the name we computed earlier and do overload
	// resolution. Lookup is only performed directly into the class since there
	// will always be a (possibly implicit) declaration to shadow any others.
	OverloadCandidateSet OCS(LookupLoc, OverloadCandidateSet::CSK_Normal);
	DeclContext::lookup_result R = RD->lookup(Name);

	if (R.empty()) {
	// We might have no default constructor because we have a lambda's closure
	// type, rather than because there's some other declared constructor.
	// Every class has a copy/move constructor, copy/move assignment, and
	// destructor.
	assert(SM == CXXDefaultConstructor &&
	"lookup for a constructor or assignment operator was empty");
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	return *Result;
	}

	// Copy the candidates as our processing of them may load new declarations
	// from an external source and invalidate lookup_result.
	SmallVector<NamedDecl *, 8> Candidates(R.begin(), R.end());

	for (NamedDecl *CandDecl : Candidates) {
	if (CandDecl->isInvalidDecl())
	continue;

	DeclAccessPair Cand = DeclAccessPair::make(CandDecl, AS_public);
	auto CtorInfo = getConstructorInfo(Cand);
	if (CXXMethodDecl *M = dyn_cast<CXXMethodDecl>(Cand->getUnderlyingDecl())) {
	if (SM == CXXCopyAssignment \|\| SM == CXXMoveAssignment)
	AddMethodCandidate(M, Cand, RD, ThisTy, Classification,
	llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
	else if (CtorInfo)
	AddOverloadCandidate(CtorInfo.Constructor, CtorInfo.FoundDecl,
	llvm::makeArrayRef(&Arg, NumArgs), OCS,
	/SuppressUserConversions/ true);
	else
	AddOverloadCandidate(M, Cand, llvm::makeArrayRef(&Arg, NumArgs), OCS,
	/SuppressUserConversions/ true);
	} else if (FunctionTemplateDecl *Tmpl =
	dyn_cast<FunctionTemplateDecl>(Cand->getUnderlyingDecl())) {
	if (SM == CXXCopyAssignment \|\| SM == CXXMoveAssignment)
	AddMethodTemplateCandidate(
	Tmpl, Cand, RD, nullptr, ThisTy, Classification,
	llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
	else if (CtorInfo)
	AddTemplateOverloadCandidate(
	CtorInfo.ConstructorTmpl, CtorInfo.FoundDecl, nullptr,
	llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
	else
	AddTemplateOverloadCandidate(
	Tmpl, Cand, nullptr, llvm::makeArrayRef(&Arg, NumArgs), OCS, true);
	} else {
	assert(isa<UsingDecl>(Cand.getDecl()) &&
	"illegal Kind of operator = Decl");
	}
	}

	OverloadCandidateSet::iterator Best;
	switch (OCS.BestViableFunction(*this, LookupLoc, Best)) {
	case OR_Success:
	Result->setMethod(cast<CXXMethodDecl>(Best->Function));
	Result->setKind(SpecialMemberOverloadResult::Success);
	break;

	case OR_Deleted:
	Result->setMethod(cast<CXXMethodDecl>(Best->Function));
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	break;

	case OR_Ambiguous:
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::Ambiguous);
	break;

	case OR_No_Viable_Function:
	Result->setMethod(nullptr);
	Result->setKind(SpecialMemberOverloadResult::NoMemberOrDeleted);
	break;
	}

	return *Result;
	}

	/// Look up the default constructor for the given class.
	CXXConstructorDecl Sema::LookupDefaultConstructor(CXXRecordDecl Class) {
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXDefaultConstructor, false, false, false,
	false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	/// Look up the copying constructor for the given class.
	CXXConstructorDecl Sema::LookupCopyingConstructor(CXXRecordDecl Class,
	unsigned Quals) {
	assert(!(Quals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy ctor arg");
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXCopyConstructor, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, false, false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	/// Look up the moving constructor for the given class.
	CXXConstructorDecl Sema::LookupMovingConstructor(CXXRecordDecl Class,
	unsigned Quals) {
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXMoveConstructor, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, false, false, false);

	return cast_or_null<CXXConstructorDecl>(Result.getMethod());
	}

	/// Look up the constructors for the given class.
	DeclContext::lookup_result Sema::LookupConstructors(CXXRecordDecl *Class) {
	// If the implicit constructors have not yet been declared, do so now.
	if (CanDeclareSpecialMemberFunction(Class)) {
	runWithSufficientStackSpace(Class->getLocation(), [&] {
	if (Class->needsImplicitDefaultConstructor())
	DeclareImplicitDefaultConstructor(Class);
	if (Class->needsImplicitCopyConstructor())
	DeclareImplicitCopyConstructor(Class);
	if (getLangOpts().CPlusPlus11 && Class->needsImplicitMoveConstructor())
	DeclareImplicitMoveConstructor(Class);
	});
	}

	CanQualType T = Context.getCanonicalType(Context.getTypeDeclType(Class));
	DeclarationName Name = Context.DeclarationNames.getCXXConstructorName(T);
	return Class->lookup(Name);
	}

	/// Look up the copying assignment operator for the given class.
	CXXMethodDecl Sema::LookupCopyingAssignment(CXXRecordDecl Class,
	unsigned Quals, bool RValueThis,
	unsigned ThisQuals) {
	assert(!(Quals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment arg");
	assert(!(ThisQuals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment this");
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXCopyAssignment, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, RValueThis,
	ThisQuals & Qualifiers::Const,
	ThisQuals & Qualifiers::Volatile);

	return Result.getMethod();
	}

	/// Look up the moving assignment operator for the given class.
	CXXMethodDecl Sema::LookupMovingAssignment(CXXRecordDecl Class,
	unsigned Quals,
	bool RValueThis,
	unsigned ThisQuals) {
	assert(!(ThisQuals & ~(Qualifiers::Const \| Qualifiers::Volatile)) &&
	"non-const, non-volatile qualifiers for copy assignment this");
	SpecialMemberOverloadResult Result =
	LookupSpecialMember(Class, CXXMoveAssignment, Quals & Qualifiers::Const,
	Quals & Qualifiers::Volatile, RValueThis,
	ThisQuals & Qualifiers::Const,
	ThisQuals & Qualifiers::Volatile);

	return Result.getMethod();
	}

	/// Look for the destructor of the given class.
	///
	/// During semantic analysis, this routine should be used in lieu of
	/// CXXRecordDecl::getDestructor().
	///
	/// \returns The destructor for this class.
	CXXDestructorDecl Sema::LookupDestructor(CXXRecordDecl Class) {
	return cast<CXXDestructorDecl>(LookupSpecialMember(Class, CXXDestructor,
	false, false, false,
	false, false).getMethod());
	}

	/// LookupLiteralOperator - Determine which literal operator should be used for
	/// a user-defined literal, per C++11 [lex.ext].
	///
	/// Normal overload resolution is not used to select which literal operator to
	/// call for a user-defined literal. Look up the provided literal operator name,
	/// and filter the results to the appropriate set for the given argument types.
	Sema::LiteralOperatorLookupResult
	Sema::LookupLiteralOperator(Scope *S, LookupResult &R,
	ArrayRef<QualType> ArgTys, bool AllowRaw,
	bool AllowTemplate, bool AllowStringTemplatePack,
	bool DiagnoseMissing, StringLiteral *StringLit) {
	LookupName(R, S);
	assert(R.getResultKind() != LookupResult::Ambiguous &&
	"literal operator lookup can't be ambiguous");

	// Filter the lookup results appropriately.
	LookupResult::Filter F = R.makeFilter();

	bool AllowCooked = true;
	bool FoundRaw = false;
	bool FoundTemplate = false;
	bool FoundStringTemplatePack = false;
	bool FoundCooked = false;

	while (F.hasNext()) {
	Decl *D = F.next();
	if (UsingShadowDecl *USD = dyn_cast<UsingShadowDecl>(D))
	D = USD->getTargetDecl();

	// If the declaration we found is invalid, skip it.
	if (D->isInvalidDecl()) {
	F.erase();
	continue;
	}

	bool IsRaw = false;
	bool IsTemplate = false;
	bool IsStringTemplatePack = false;
	bool IsCooked = false;

	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
	if (FD->getNumParams() == 1 &&
	FD->getParamDecl(0)->getType()->getAs<PointerType>())
	IsRaw = true;
	else if (FD->getNumParams() == ArgTys.size()) {
	IsCooked = true;
	for (unsigned ArgIdx = 0; ArgIdx != ArgTys.size(); ++ArgIdx) {
	QualType ParamTy = FD->getParamDecl(ArgIdx)->getType();
	if (!Context.hasSameUnqualifiedType(ArgTys[ArgIdx], ParamTy)) {
	IsCooked = false;
	break;
	}
	}
	}
	}
	if (FunctionTemplateDecl *FD = dyn_cast<FunctionTemplateDecl>(D)) {
	TemplateParameterList *Params = FD->getTemplateParameters();
	if (Params->size() == 1) {
	IsTemplate = true;
	if (!Params->getParam(0)->isTemplateParameterPack() && !StringLit) {
	// Implied but not stated: user-defined integer and floating literals
	// only ever use numeric literal operator templates, not templates
	// taking a parameter of class type.
	F.erase();
	continue;
	}

	// A string literal template is only considered if the string literal
	// is a well-formed template argument for the template parameter.
	if (StringLit) {
	SFINAETrap Trap(*this);
	SmallVector<TemplateArgument, 1> Checked;
	TemplateArgumentLoc Arg(TemplateArgument(StringLit), StringLit);
	if (CheckTemplateArgument(Params->getParam(0), Arg, FD,
	R.getNameLoc(), R.getNameLoc(), 0,
	Checked) \|\|
	Trap.hasErrorOccurred())
	IsTemplate = false;
	}
	} else {
	IsStringTemplatePack = true;
	}
	}

	if (AllowTemplate && StringLit && IsTemplate) {
	FoundTemplate = true;
	AllowRaw = false;
	AllowCooked = false;
	AllowStringTemplatePack = false;
	if (FoundRaw \|\| FoundCooked \|\| FoundStringTemplatePack) {
	F.restart();
	FoundRaw = FoundCooked = FoundStringTemplatePack = false;
	}
	} else if (AllowCooked && IsCooked) {
	FoundCooked = true;
	AllowRaw = false;
	AllowTemplate = StringLit;
	AllowStringTemplatePack = false;
	if (FoundRaw \|\| FoundTemplate \|\| FoundStringTemplatePack) {
	// Go through again and remove the raw and template decls we've
	// already found.
	F.restart();
	FoundRaw = FoundTemplate = FoundStringTemplatePack = false;
	}
	} else if (AllowRaw && IsRaw) {
	FoundRaw = true;
	} else if (AllowTemplate && IsTemplate) {
	FoundTemplate = true;
	} else if (AllowStringTemplatePack && IsStringTemplatePack) {
	FoundStringTemplatePack = true;
	} else {
	F.erase();
	}
	}

	F.done();

	// Per C++20 [lex.ext]p5, we prefer the template form over the non-template
	// form for string literal operator templates.
	if (StringLit && FoundTemplate)
	return LOLR_Template;

	// C++11 [lex.ext]p3, p4: If S contains a literal operator with a matching
	// parameter type, that is used in preference to a raw literal operator
	// or literal operator template.
	if (FoundCooked)
	return LOLR_Cooked;

	// C++11 [lex.ext]p3, p4: S shall contain a raw literal operator or a literal
	// operator template, but not both.
	if (FoundRaw && FoundTemplate) {
	Diag(R.getNameLoc(), diag::err_ovl_ambiguous_call) << R.getLookupName();
	for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
	NoteOverloadCandidate(I, (I)->getUnderlyingDecl()->getAsFunction());
	return LOLR_Error;
	}

	if (FoundRaw)
	return LOLR_Raw;

	if (FoundTemplate)
	return LOLR_Template;

	if (FoundStringTemplatePack)
	return LOLR_StringTemplatePack;

	// Didn't find anything we could use.
	if (DiagnoseMissing) {
	Diag(R.getNameLoc(), diag::err_ovl_no_viable_literal_operator)
	<< R.getLookupName() << (int)ArgTys.size() << ArgTys[0]
	<< (ArgTys.size() == 2 ? ArgTys[1] : QualType()) << AllowRaw
	<< (AllowTemplate \|\| AllowStringTemplatePack);
	return LOLR_Error;
	}

	return LOLR_ErrorNoDiagnostic;
	}

	void ADLResult::insert(NamedDecl *New) {
	NamedDecl *&Old = Decls[cast<NamedDecl>(New->getCanonicalDecl())];

	// If we haven't yet seen a decl for this key, or the last decl
	// was exactly this one, we're done.
	if (Old == nullptr \|\| Old == New) {
	Old = New;
	return;
	}

	// Otherwise, decide which is a more recent redeclaration.
	FunctionDecl *OldFD = Old->getAsFunction();
	FunctionDecl *NewFD = New->getAsFunction();

	FunctionDecl *Cursor = NewFD;
	while (true) {
	Cursor = Cursor->getPreviousDecl();

	// If we got to the end without finding OldFD, OldFD is the newer
	// declaration; leave things as they are.
	if (!Cursor) return;

	// If we do find OldFD, then NewFD is newer.
	if (Cursor == OldFD) break;

	// Otherwise, keep looking.
	}

	Old = New;
	}

	void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
	ArrayRef<Expr *> Args, ADLResult &Result) {
	// Find all of the associated namespaces and classes based on the
	// arguments we have.
	AssociatedNamespaceSet AssociatedNamespaces;
	AssociatedClassSet AssociatedClasses;
	FindAssociatedClassesAndNamespaces(Loc, Args,
	AssociatedNamespaces,
	AssociatedClasses);

	// C++ [basic.lookup.argdep]p3:
	// Let X be the lookup set produced by unqualified lookup (3.4.1)
	// and let Y be the lookup set produced by argument dependent
	// lookup (defined as follows). If X contains [...] then Y is
	// empty. Otherwise Y is the set of declarations found in the
	// namespaces associated with the argument types as described
	// below. The set of declarations found by the lookup of the name
	// is the union of X and Y.
	//
	// Here, we compute Y and add its members to the overloaded
	// candidate set.
	for (auto *NS : AssociatedNamespaces) {
	// When considering an associated namespace, the lookup is the
	// same as the lookup performed when the associated namespace is
	// used as a qualifier (3.4.3.2) except that:
	//
	// -- Any using-directives in the associated namespace are
	// ignored.
	//
	// -- Any namespace-scope friend functions declared in
	// associated classes are visible within their respective
	// namespaces even if they are not visible during an ordinary
	// lookup (11.4).
	//
	// C++20 [basic.lookup.argdep] p4.3
	// -- are exported, are attached to a named module M, do not appear
	// in the translation unit containing the point of the lookup, and
	// have the same innermost enclosing non-inline namespace scope as
	// a declaration of an associated entity attached to M.
	DeclContext::lookup_result R = NS->lookup(Name);
	for (auto *D : R) {
	auto *Underlying = D;
	if (auto *USD = dyn_cast<UsingShadowDecl>(D))
	Underlying = USD->getTargetDecl();

	if (!isa<FunctionDecl>(Underlying) &&
	!isa<FunctionTemplateDecl>(Underlying))
	continue;

	// The declaration is visible to argument-dependent lookup if either
	// it's ordinarily visible or declared as a friend in an associated
	// class.
	bool Visible = false;
	for (D = D->getMostRecentDecl(); D;
	D = cast_or_null<NamedDecl>(D->getPreviousDecl())) {
	if (D->getIdentifierNamespace() & Decl::IDNS_Ordinary) {
	if (isVisible(D)) {
	Visible = true;
	break;
	} else if (getLangOpts().CPlusPlusModules &&
	D->isInExportDeclContext()) {
	// C++20 [basic.lookup.argdep] p4.3 .. are exported ...
	Module *FM = D->getOwningModule();
	// exports are only valid in module purview and outside of any
	// PMF (although a PMF should not even be present in a module
	// with an import).
	assert(FM && FM->isModulePurview() && !FM->isPrivateModule() &&
	"bad export context");
	// .. are attached to a named module M, do not appear in the
	// translation unit containing the point of the lookup..
	if (!isModuleUnitOfCurrentTU(FM) &&
	llvm::any_of(AssociatedClasses, [&](auto *E) {
	// ... and have the same innermost enclosing non-inline
	// namespace scope as a declaration of an associated entity
	// attached to M
	if (!E->hasOwningModule() \|\|
	E->getOwningModule()->getTopLevelModuleName() !=
	FM->getTopLevelModuleName())
	return false;
	// TODO: maybe this could be cached when generating the
	// associated namespaces / entities.
	DeclContext *Ctx = E->getDeclContext();
	while (!Ctx->isFileContext() \|\| Ctx->isInlineNamespace())
	Ctx = Ctx->getParent();
	return Ctx == NS;
	})) {
	Visible = true;
	break;
	}
	}
	} else if (D->getFriendObjectKind()) {
	auto *RD = cast<CXXRecordDecl>(D->getLexicalDeclContext());
	// [basic.lookup.argdep]p4:
	// Argument-dependent lookup finds all declarations of functions and
	// function templates that
	// - ...
	// - are declared as a friend ([class.friend]) of any class with a
	// reachable definition in the set of associated entities,
	//
	// FIXME: If there's a merged definition of D that is reachable, then
	// the friend declaration should be considered.
	if (AssociatedClasses.count(RD) && isReachable(D)) {
	Visible = true;
	break;
	}
	}
	}

	// FIXME: Preserve D as the FoundDecl.
	if (Visible)
	Result.insert(Underlying);
	}
	}
	}

	//----------------------------------------------------------------------------
	// Search for all visible declarations.
	//----------------------------------------------------------------------------
	VisibleDeclConsumer::~VisibleDeclConsumer() { }

	bool VisibleDeclConsumer::includeHiddenDecls() const { return false; }

	namespace {

	class ShadowContextRAII;

	class VisibleDeclsRecord {
	public:
	/// An entry in the shadow map, which is optimized to store a
	/// single declaration (the common case) but can also store a list
	/// of declarations.
	typedef llvm::TinyPtrVector<NamedDecl*> ShadowMapEntry;

	private:
	/// A mapping from declaration names to the declarations that have
	/// this name within a particular scope.
	typedef llvm::DenseMap<DeclarationName, ShadowMapEntry> ShadowMap;

	/// A list of shadow maps, which is used to model name hiding.
	std::list<ShadowMap> ShadowMaps;

	/// The declaration contexts we have already visited.
	llvm::SmallPtrSet<DeclContext *, 8> VisitedContexts;

	friend class ShadowContextRAII;

	public:
	/// Determine whether we have already visited this context
	/// (and, if not, note that we are going to visit that context now).
	bool visitedContext(DeclContext *Ctx) {
	return !VisitedContexts.insert(Ctx).second;
	}

	bool alreadyVisitedContext(DeclContext *Ctx) {
	return VisitedContexts.count(Ctx);
	}

	/// Determine whether the given declaration is hidden in the
	/// current scope.
	///
	/// \returns the declaration that hides the given declaration, or
	/// NULL if no such declaration exists.
	NamedDecl checkHidden(NamedDecl ND);

	/// Add a declaration to the current shadow map.
	void add(NamedDecl *ND) {
	ShadowMaps.back()[ND->getDeclName()].push_back(ND);
	}
	};

	/// RAII object that records when we've entered a shadow context.
	class ShadowContextRAII {
	VisibleDeclsRecord &Visible;

	typedef VisibleDeclsRecord::ShadowMap ShadowMap;

	public:
	ShadowContextRAII(VisibleDeclsRecord &Visible) : Visible(Visible) {
	Visible.ShadowMaps.emplace_back();
	}

	~ShadowContextRAII() {
	Visible.ShadowMaps.pop_back();
	}
	};

	} // end anonymous namespace

	NamedDecl VisibleDeclsRecord::checkHidden(NamedDecl ND) {
	unsigned IDNS = ND->getIdentifierNamespace();
	std::list<ShadowMap>::reverse_iterator SM = ShadowMaps.rbegin();
	for (std::list<ShadowMap>::reverse_iterator SMEnd = ShadowMaps.rend();
	SM != SMEnd; ++SM) {
	ShadowMap::iterator Pos = SM->find(ND->getDeclName());
	if (Pos == SM->end())
	continue;

	for (auto *D : Pos->second) {
	// A tag declaration does not hide a non-tag declaration.
	if (D->hasTagIdentifierNamespace() &&
	(IDNS & (Decl::IDNS_Member \| Decl::IDNS_Ordinary \|
	Decl::IDNS_ObjCProtocol)))
	continue;

	// Protocols are in distinct namespaces from everything else.
	if (((D->getIdentifierNamespace() & Decl::IDNS_ObjCProtocol)
	\|\| (IDNS & Decl::IDNS_ObjCProtocol)) &&
	D->getIdentifierNamespace() != IDNS)
	continue;

	// Functions and function templates in the same scope overload
	// rather than hide. FIXME: Look for hiding based on function
	// signatures!
	if (D->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
	ND->getUnderlyingDecl()->isFunctionOrFunctionTemplate() &&
	SM == ShadowMaps.rbegin())
	continue;

	// A shadow declaration that's created by a resolved using declaration
	// is not hidden by the same using declaration.
	if (isa<UsingShadowDecl>(ND) && isa<UsingDecl>(D) &&
	cast<UsingShadowDecl>(ND)->getIntroducer() == D)
	continue;

	// We've found a declaration that hides this one.
	return D;
	}
	}

	return nullptr;
	}

	namespace {
	class LookupVisibleHelper {
	public:
	LookupVisibleHelper(VisibleDeclConsumer &Consumer, bool IncludeDependentBases,
	bool LoadExternal)
	: Consumer(Consumer), IncludeDependentBases(IncludeDependentBases),
	LoadExternal(LoadExternal) {}

	void lookupVisibleDecls(Sema &SemaRef, Scope *S, Sema::LookupNameKind Kind,
	bool IncludeGlobalScope) {
	// Determine the set of using directives available during
	// unqualified name lookup.
	Scope *Initial = S;
	UnqualUsingDirectiveSet UDirs(SemaRef);
	if (SemaRef.getLangOpts().CPlusPlus) {
	// Find the first namespace or translation-unit scope.
	while (S && !isNamespaceOrTranslationUnitScope(S))
	S = S->getParent();

	UDirs.visitScopeChain(Initial, S);
	}
	UDirs.done();

	// Look for visible declarations.
	LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
	Result.setAllowHidden(Consumer.includeHiddenDecls());
	if (!IncludeGlobalScope)
	Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());
	ShadowContextRAII Shadow(Visited);
	lookupInScope(Initial, Result, UDirs);
	}

	void lookupVisibleDecls(Sema &SemaRef, DeclContext *Ctx,
	Sema::LookupNameKind Kind, bool IncludeGlobalScope) {
	LookupResult Result(SemaRef, DeclarationName(), SourceLocation(), Kind);
	Result.setAllowHidden(Consumer.includeHiddenDecls());
	if (!IncludeGlobalScope)
	Visited.visitedContext(SemaRef.getASTContext().getTranslationUnitDecl());

	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Ctx, Result, /QualifiedNameLookup=/true,
	/InBaseClass=/false);
	}

	private:
	void lookupInDeclContext(DeclContext *Ctx, LookupResult &Result,
	bool QualifiedNameLookup, bool InBaseClass) {
	if (!Ctx)
	return;

	// Make sure we don't visit the same context twice.
	if (Visited.visitedContext(Ctx->getPrimaryContext()))
	return;

	Consumer.EnteredContext(Ctx);

	// Outside C++, lookup results for the TU live on identifiers.
	if (isa<TranslationUnitDecl>(Ctx) &&
	!Result.getSema().getLangOpts().CPlusPlus) {
	auto &S = Result.getSema();
	auto &Idents = S.Context.Idents;

	// Ensure all external identifiers are in the identifier table.
	if (LoadExternal)
	if (IdentifierInfoLookup *External =
	Idents.getExternalIdentifierLookup()) {
	std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
	for (StringRef Name = Iter->Next(); !Name.empty();
	Name = Iter->Next())
	Idents.get(Name);
	}

	// Walk all lookup results in the TU for each identifier.
	for (const auto &Ident : Idents) {
	for (auto I = S.IdResolver.begin(Ident.getValue()),
	E = S.IdResolver.end();
	I != E; ++I) {
	if (S.IdResolver.isDeclInScope(*I, Ctx)) {
	if (NamedDecl ND = Result.getAcceptableDecl(I)) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
	Visited.add(ND);
	}
	}
	}
	}

	return;
	}

	if (CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(Ctx))
	Result.getSema().ForceDeclarationOfImplicitMembers(Class);

	llvm::SmallVector<NamedDecl *, 4> DeclsToVisit;
	// We sometimes skip loading namespace-level results (they tend to be huge).
	bool Load = LoadExternal \|\|
	!(isa<TranslationUnitDecl>(Ctx) \|\| isa<NamespaceDecl>(Ctx));
	// Enumerate all of the results in this context.
	for (DeclContextLookupResult R :
	Load ? Ctx->lookups()
	: Ctx->noload_lookups(/PreserveInternalState=/false)) {
	for (auto *D : R) {
	if (auto *ND = Result.getAcceptableDecl(D)) {
	// Rather than visit immediately, we put ND into a vector and visit
	// all decls, in order, outside of this loop. The reason is that
	// Consumer.FoundDecl() may invalidate the iterators used in the two
	// loops above.
	DeclsToVisit.push_back(ND);
	}
	}
	}

	for (auto *ND : DeclsToVisit) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), Ctx, InBaseClass);
	Visited.add(ND);
	}
	DeclsToVisit.clear();

	// Traverse using directives for qualified name lookup.
	if (QualifiedNameLookup) {
	ShadowContextRAII Shadow(Visited);
	for (auto I : Ctx->using_directives()) {
	if (!Result.getSema().isVisible(I))
	continue;
	lookupInDeclContext(I->getNominatedNamespace(), Result,
	QualifiedNameLookup, InBaseClass);
	}
	}

	// Traverse the contexts of inherited C++ classes.
	if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(Ctx)) {
	if (!Record->hasDefinition())
	return;

	for (const auto &B : Record->bases()) {
	QualType BaseType = B.getType();

	RecordDecl *RD;
	if (BaseType->isDependentType()) {
	if (!IncludeDependentBases) {
	// Don't look into dependent bases, because name lookup can't look
	// there anyway.
	continue;
	}
	const auto *TST = BaseType->getAs<TemplateSpecializationType>();
	if (!TST)
	continue;
	TemplateName TN = TST->getTemplateName();
	const auto *TD =
	dyn_cast_or_null<ClassTemplateDecl>(TN.getAsTemplateDecl());
	if (!TD)
	continue;
	RD = TD->getTemplatedDecl();
	} else {
	const auto *Record = BaseType->getAs<RecordType>();
	if (!Record)
	continue;
	RD = Record->getDecl();
	}

	// FIXME: It would be nice to be able to determine whether referencing
	// a particular member would be ambiguous. For example, given
	//
	// struct A { int member; };
	// struct B { int member; };
	// struct C : A, B { };
	//
	// void f(C *c) { c->### }
	//
	// accessing 'member' would result in an ambiguity. However, we
	// could be smart enough to qualify the member with the base
	// class, e.g.,
	//
	// c->B::member
	//
	// or
	//
	// c->A::member

	// Find results in this base class (and its bases).
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(RD, Result, QualifiedNameLookup,
	/InBaseClass=/true);
	}
	}

	// Traverse the contexts of Objective-C classes.
	if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(Ctx)) {
	// Traverse categories.
	for (auto *Cat : IFace->visible_categories()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Cat, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// Traverse protocols.
	for (auto *I : IFace->all_referenced_protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// Traverse the superclass.
	if (IFace->getSuperClass()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(IFace->getSuperClass(), Result, QualifiedNameLookup,
	/InBaseClass=/true);
	}

	// If there is an implementation, traverse it. We do this to find
	// synthesized ivars.
	if (IFace->getImplementation()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(IFace->getImplementation(), Result,
	QualifiedNameLookup, InBaseClass);
	}
	} else if (ObjCProtocolDecl *Protocol = dyn_cast<ObjCProtocolDecl>(Ctx)) {
	for (auto *I : Protocol->protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}
	} else if (ObjCCategoryDecl *Category = dyn_cast<ObjCCategoryDecl>(Ctx)) {
	for (auto *I : Category->protocols()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(I, Result, QualifiedNameLookup,
	/InBaseClass=/false);
	}

	// If there is an implementation, traverse it.
	if (Category->getImplementation()) {
	ShadowContextRAII Shadow(Visited);
	lookupInDeclContext(Category->getImplementation(), Result,
	QualifiedNameLookup, /InBaseClass=/true);
	}
	}
	}

	void lookupInScope(Scope *S, LookupResult &Result,
	UnqualUsingDirectiveSet &UDirs) {
	// No clients run in this mode and it's not supported. Please add tests and
	// remove the assertion if you start relying on it.
	assert(!IncludeDependentBases && "Unsupported flag for lookupInScope");

	if (!S)
	return;

	if (!S->getEntity() \|\|
	(!S->getParent() && !Visited.alreadyVisitedContext(S->getEntity())) \|\|
	(S->getEntity())->isFunctionOrMethod()) {
	FindLocalExternScope FindLocals(Result);
	// Walk through the declarations in this Scope. The consumer might add new
	// decls to the scope as part of deserialization, so make a copy first.
	SmallVector<Decl *, 8> ScopeDecls(S->decls().begin(), S->decls().end());
	for (Decl *D : ScopeDecls) {
	if (NamedDecl *ND = dyn_cast<NamedDecl>(D))
	if ((ND = Result.getAcceptableDecl(ND))) {
	Consumer.FoundDecl(ND, Visited.checkHidden(ND), nullptr, false);
	Visited.add(ND);
	}
	}
	}

	DeclContext *Entity = S->getLookupEntity();
	if (Entity) {
	// Look into this scope's declaration context, along with any of its
	// parent lookup contexts (e.g., enclosing classes), up to the point
	// where we hit the context stored in the next outer scope.
	DeclContext *OuterCtx = findOuterContext(S);

	for (DeclContext *Ctx = Entity; Ctx && !Ctx->Equals(OuterCtx);
	Ctx = Ctx->getLookupParent()) {
	if (ObjCMethodDecl *Method = dyn_cast<ObjCMethodDecl>(Ctx)) {
	if (Method->isInstanceMethod()) {
	// For instance methods, look for ivars in the method's interface.
	LookupResult IvarResult(Result.getSema(), Result.getLookupName(),
	Result.getNameLoc(),
	Sema::LookupMemberName);
	if (ObjCInterfaceDecl *IFace = Method->getClassInterface()) {
	lookupInDeclContext(IFace, IvarResult,
	/QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}
	}

	// We've already performed all of the name lookup that we need
	// to for Objective-C methods; the next context will be the
	// outer scope.
	break;
	}

	if (Ctx->isFunctionOrMethod())
	continue;

	lookupInDeclContext(Ctx, Result, /QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}
	} else if (!S->getParent()) {
	// Look into the translation unit scope. We walk through the translation
	// unit's declaration context, because the Scope itself won't have all of
	// the declarations if we loaded a precompiled header.
	// FIXME: We would like the translation unit's Scope object to point to
	// the translation unit, so we don't need this special "if" branch.
	// However, doing so would force the normal C++ name-lookup code to look
	// into the translation unit decl when the IdentifierInfo chains would
	// suffice. Once we fix that problem (which is part of a more general
	// "don't look in DeclContexts unless we have to" optimization), we can
	// eliminate this.
	Entity = Result.getSema().Context.getTranslationUnitDecl();
	lookupInDeclContext(Entity, Result, /QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}

	if (Entity) {
	// Lookup visible declarations in any namespaces found by using
	// directives.
	for (const UnqualUsingEntry &UUE : UDirs.getNamespacesFor(Entity))
	lookupInDeclContext(
	const_cast<DeclContext *>(UUE.getNominatedNamespace()), Result,
	/QualifiedNameLookup=/false,
	/InBaseClass=/false);
	}

	// Lookup names in the parent scope.
	ShadowContextRAII Shadow(Visited);
	lookupInScope(S->getParent(), Result, UDirs);
	}

	private:
	VisibleDeclsRecord Visited;
	VisibleDeclConsumer &Consumer;
	bool IncludeDependentBases;
	bool LoadExternal;
	};
	} // namespace

	void Sema::LookupVisibleDecls(Scope *S, LookupNameKind Kind,
	VisibleDeclConsumer &Consumer,
	bool IncludeGlobalScope, bool LoadExternal) {
	LookupVisibleHelper H(Consumer, /IncludeDependentBases=/false,
	LoadExternal);
	H.lookupVisibleDecls(*this, S, Kind, IncludeGlobalScope);
	}

	void Sema::LookupVisibleDecls(DeclContext *Ctx, LookupNameKind Kind,
	VisibleDeclConsumer &Consumer,
	bool IncludeGlobalScope,
	bool IncludeDependentBases, bool LoadExternal) {
	LookupVisibleHelper H(Consumer, IncludeDependentBases, LoadExternal);
	H.lookupVisibleDecls(*this, Ctx, Kind, IncludeGlobalScope);
	}

	/// LookupOrCreateLabel - Do a name lookup of a label with the specified name.
	/// If GnuLabelLoc is a valid source location, then this is a definition
	/// of an __label__ label name, otherwise it is a normal label definition
	/// or use.
	LabelDecl Sema::LookupOrCreateLabel(IdentifierInfo II, SourceLocation Loc,
	SourceLocation GnuLabelLoc) {
	// Do a lookup to see if we have a label with this name already.
	NamedDecl *Res = nullptr;

	if (GnuLabelLoc.isValid()) {
	// Local label definitions always shadow existing labels.
	Res = LabelDecl::Create(Context, CurContext, Loc, II, GnuLabelLoc);
	Scope *S = CurScope;
	PushOnScopeChains(Res, S, true);
	return cast<LabelDecl>(Res);
	}

	// Not a GNU local label.
	Res = LookupSingleName(CurScope, II, Loc, LookupLabel, NotForRedeclaration);
	// If we found a label, check to see if it is in the same context as us.
	// When in a Block, we don't want to reuse a label in an enclosing function.
	if (Res && Res->getDeclContext() != CurContext)
	Res = nullptr;
	if (!Res) {
	// If not forward referenced or defined already, create the backing decl.
	Res = LabelDecl::Create(Context, CurContext, Loc, II);
	Scope *S = CurScope->getFnParent();
	assert(S && "Not in a function?");
	PushOnScopeChains(Res, S, true);
	}
	return cast<LabelDecl>(Res);
	}

	//===----------------------------------------------------------------------===//
	// Typo correction
	//===----------------------------------------------------------------------===//

	static bool isCandidateViable(CorrectionCandidateCallback &CCC,
	TypoCorrection &Candidate) {
	Candidate.setCallbackDistance(CCC.RankCandidate(Candidate));
	return Candidate.getEditDistance(false) != TypoCorrection::InvalidDistance;
	}

	static void LookupPotentialTypoResult(Sema &SemaRef,
	LookupResult &Res,
	IdentifierInfo *Name,
	Scope S, CXXScopeSpec SS,
	DeclContext *MemberContext,
	bool EnteringContext,
	bool isObjCIvarLookup,
	bool FindHidden);

	/// Check whether the declarations found for a typo correction are
	/// visible. Set the correction's RequiresImport flag to true if none of the
	/// declarations are visible, false otherwise.
	static void checkCorrectionVisibility(Sema &SemaRef, TypoCorrection &TC) {
	TypoCorrection::decl_iterator DI = TC.begin(), DE = TC.end();

	for (/**/; DI != DE; ++DI)
	if (!LookupResult::isVisible(SemaRef, *DI))
	break;
	// No filtering needed if all decls are visible.
	if (DI == DE) {
	TC.setRequiresImport(false);
	return;
	}

	llvm::SmallVector<NamedDecl*, 4> NewDecls(TC.begin(), DI);
	bool AnyVisibleDecls = !NewDecls.empty();

	for (/**/; DI != DE; ++DI) {
	if (LookupResult::isVisible(SemaRef, *DI)) {
	if (!AnyVisibleDecls) {
	// Found a visible decl, discard all hidden ones.
	AnyVisibleDecls = true;
	NewDecls.clear();
	}
	NewDecls.push_back(*DI);
	} else if (!AnyVisibleDecls && !(*DI)->isModulePrivate())
	NewDecls.push_back(*DI);
	}

	if (NewDecls.empty())
	TC = TypoCorrection();
	else {
	TC.setCorrectionDecls(NewDecls);
	TC.setRequiresImport(!AnyVisibleDecls);
	}
	}

	// Fill the supplied vector with the IdentifierInfo pointers for each piece of
	// the given NestedNameSpecifier (i.e. given a NestedNameSpecifier "foo::bar::",
	// fill the vector with the IdentifierInfo pointers for "foo" and "bar").
	static void getNestedNameSpecifierIdentifiers(
	NestedNameSpecifier *NNS,
	SmallVectorImpl<const IdentifierInfo*> &Identifiers) {
	if (NestedNameSpecifier *Prefix = NNS->getPrefix())
	getNestedNameSpecifierIdentifiers(Prefix, Identifiers);
	else
	Identifiers.clear();

	const IdentifierInfo *II = nullptr;

	switch (NNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	II = NNS->getAsIdentifier();
	break;

	case NestedNameSpecifier::Namespace:
	if (NNS->getAsNamespace()->isAnonymousNamespace())
	return;
	II = NNS->getAsNamespace()->getIdentifier();
	break;

	case NestedNameSpecifier::NamespaceAlias:
	II = NNS->getAsNamespaceAlias()->getIdentifier();
	break;

	case NestedNameSpecifier::TypeSpecWithTemplate:
	case NestedNameSpecifier::TypeSpec:
	II = QualType(NNS->getAsType(), 0).getBaseTypeIdentifier();
	break;

	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	return;
	}

	if (II)
	Identifiers.push_back(II);
	}

	void TypoCorrectionConsumer::FoundDecl(NamedDecl ND, NamedDecl Hiding,
	DeclContext *Ctx, bool InBaseClass) {
	// Don't consider hidden names for typo correction.
	if (Hiding)
	return;

	// Only consider entities with identifiers for names, ignoring
	// special names (constructors, overloaded operators, selectors,
	// etc.).
	IdentifierInfo *Name = ND->getIdentifier();
	if (!Name)
	return;

	// Only consider visible declarations and declarations from modules with
	// names that exactly match.
	if (!LookupResult::isVisible(SemaRef, ND) && Name != Typo)
	return;

	FoundName(Name->getName());
	}

	void TypoCorrectionConsumer::FoundName(StringRef Name) {
	// Compute the edit distance between the typo and the name of this
	// entity, and add the identifier to the list of results.
	addName(Name, nullptr);
	}

	void TypoCorrectionConsumer::addKeywordResult(StringRef Keyword) {
	// Compute the edit distance between the typo and this keyword,
	// and add the keyword to the list of results.
	addName(Keyword, nullptr, nullptr, true);
	}

	void TypoCorrectionConsumer::addName(StringRef Name, NamedDecl *ND,
	NestedNameSpecifier *NNS, bool isKeyword) {
	// Use a simple length-based heuristic to determine the minimum possible
	// edit distance. If the minimum isn't good enough, bail out early.
	StringRef TypoStr = Typo->getName();
	unsigned MinED = abs((int)Name.size() - (int)TypoStr.size());
	if (MinED && TypoStr.size() / MinED < 3)
	return;

	// Compute an upper bound on the allowable edit distance, so that the
	// edit-distance algorithm can short-circuit.
	unsigned UpperBound = (TypoStr.size() + 2) / 3;
	unsigned ED = TypoStr.edit_distance(Name, true, UpperBound);
	if (ED > UpperBound) return;

	TypoCorrection TC(&SemaRef.Context.Idents.get(Name), ND, NNS, ED);
	if (isKeyword) TC.makeKeyword();
	TC.setCorrectionRange(nullptr, Result.getLookupNameInfo());
	addCorrection(TC);
	}

	static const unsigned MaxTypoDistanceResultSets = 5;

	void TypoCorrectionConsumer::addCorrection(TypoCorrection Correction) {
	StringRef TypoStr = Typo->getName();
	StringRef Name = Correction.getCorrectionAsIdentifierInfo()->getName();

	// For very short typos, ignore potential corrections that have a different
	// base identifier from the typo or which have a normalized edit distance
	// longer than the typo itself.
	if (TypoStr.size() < 3 &&
	(Name != TypoStr \|\| Correction.getEditDistance(true) > TypoStr.size()))
	return;

	// If the correction is resolved but is not viable, ignore it.
	if (Correction.isResolved()) {
	checkCorrectionVisibility(SemaRef, Correction);
	if (!Correction \|\| !isCandidateViable(*CorrectionValidator, Correction))
	return;
	}

	TypoResultList &CList =
	CorrectionResults[Correction.getEditDistance(false)][Name];

	if (!CList.empty() && !CList.back().isResolved())
	CList.pop_back();
	if (NamedDecl *NewND = Correction.getCorrectionDecl()) {
	auto RI = llvm::find_if(CList, [NewND](const TypoCorrection &TypoCorr) {
	return TypoCorr.getCorrectionDecl() == NewND;
	});
	if (RI != CList.end()) {
	// The Correction refers to a decl already in the list. No insertion is
	// necessary and all further cases will return.

	auto IsDeprecated = [](Decl *D) {
	while (D) {
	if (D->isDeprecated())
	return true;
	D = llvm::dyn_cast_or_null<NamespaceDecl>(D->getDeclContext());
	}
	return false;
	};

	// Prefer non deprecated Corrections over deprecated and only then
	// sort using an alphabetical order.
	std::pair<bool, std::string> NewKey = {
	IsDeprecated(Correction.getFoundDecl()),
	Correction.getAsString(SemaRef.getLangOpts())};

	std::pair<bool, std::string> PrevKey = {
	IsDeprecated(RI->getFoundDecl()),
	RI->getAsString(SemaRef.getLangOpts())};

	if (NewKey < PrevKey)
	*RI = Correction;
	return;
	}
	}
	if (CList.empty() \|\| Correction.isResolved())
	CList.push_back(Correction);

	while (CorrectionResults.size() > MaxTypoDistanceResultSets)
	CorrectionResults.erase(std::prev(CorrectionResults.end()));
	}

	void TypoCorrectionConsumer::addNamespaces(
	const llvm::MapVector<NamespaceDecl *, bool> &KnownNamespaces) {
	SearchNamespaces = true;

	for (auto KNPair : KnownNamespaces)
	Namespaces.addNameSpecifier(KNPair.first);

	bool SSIsTemplate = false;
	if (NestedNameSpecifier *NNS =
	(SS && SS->isValid()) ? SS->getScopeRep() : nullptr) {
	if (const Type *T = NNS->getAsType())
	SSIsTemplate = T->getTypeClass() == Type::TemplateSpecialization;
	}
	// Do not transform this into an iterator-based loop. The loop body can
	// trigger the creation of further types (through lazy deserialization) and
	// invalid iterators into this list.
	auto &Types = SemaRef.getASTContext().getTypes();
	for (unsigned I = 0; I != Types.size(); ++I) {
	const auto *TI = Types[I];
	if (CXXRecordDecl *CD = TI->getAsCXXRecordDecl()) {
	CD = CD->getCanonicalDecl();
	if (!CD->isDependentType() && !CD->isAnonymousStructOrUnion() &&
	!CD->isUnion() && CD->getIdentifier() &&
	(SSIsTemplate \|\| !isa<ClassTemplateSpecializationDecl>(CD)) &&
	(CD->isBeingDefined() \|\| CD->isCompleteDefinition()))
	Namespaces.addNameSpecifier(CD);
	}
	}
	}

	const TypoCorrection &TypoCorrectionConsumer::getNextCorrection() {
	if (++CurrentTCIndex < ValidatedCorrections.size())
	return ValidatedCorrections[CurrentTCIndex];

	CurrentTCIndex = ValidatedCorrections.size();
	while (!CorrectionResults.empty()) {
	auto DI = CorrectionResults.begin();
	if (DI->second.empty()) {
	CorrectionResults.erase(DI);
	continue;
	}

	auto RI = DI->second.begin();
	if (RI->second.empty()) {
	DI->second.erase(RI);
	performQualifiedLookups();
	continue;
	}

	TypoCorrection TC = RI->second.pop_back_val();
	if (TC.isResolved() \|\| TC.requiresImport() \|\| resolveCorrection(TC)) {
	ValidatedCorrections.push_back(TC);
	return ValidatedCorrections[CurrentTCIndex];
	}
	}
	return ValidatedCorrections[0]; // The empty correction.
	}

	bool TypoCorrectionConsumer::resolveCorrection(TypoCorrection &Candidate) {
	IdentifierInfo *Name = Candidate.getCorrectionAsIdentifierInfo();
	DeclContext *TempMemberContext = MemberContext;
	CXXScopeSpec *TempSS = SS.get();
	retry_lookup:
	LookupPotentialTypoResult(SemaRef, Result, Name, S, TempSS, TempMemberContext,
	EnteringContext,
	CorrectionValidator->IsObjCIvarLookup,
	Name == Typo && !Candidate.WillReplaceSpecifier());
	switch (Result.getResultKind()) {
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::FoundUnresolvedValue:
	if (TempSS) {
	// Immediately retry the lookup without the given CXXScopeSpec
	TempSS = nullptr;
	Candidate.WillReplaceSpecifier(true);
	goto retry_lookup;
	}
	if (TempMemberContext) {
	if (SS && !TempSS)
	TempSS = SS.get();
	TempMemberContext = nullptr;
	goto retry_lookup;
	}
	if (SearchNamespaces)
	QualifiedResults.push_back(Candidate);
	break;

	case LookupResult::Ambiguous:
	// We don't deal with ambiguities.
	break;

	case LookupResult::Found:
	case LookupResult::FoundOverloaded:
	// Store all of the Decls for overloaded symbols
	for (auto *TRD : Result)
	Candidate.addCorrectionDecl(TRD);
	checkCorrectionVisibility(SemaRef, Candidate);
	if (!isCandidateViable(*CorrectionValidator, Candidate)) {
	if (SearchNamespaces)
	QualifiedResults.push_back(Candidate);
	break;
	}
	Candidate.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
	return true;
	}
	return false;
	}

	void TypoCorrectionConsumer::performQualifiedLookups() {
	unsigned TypoLen = Typo->getName().size();
	for (const TypoCorrection &QR : QualifiedResults) {
	for (const auto &NSI : Namespaces) {
	DeclContext *Ctx = NSI.DeclCtx;
	const Type *NSType = NSI.NameSpecifier->getAsType();

	// If the current NestedNameSpecifier refers to a class and the
	// current correction candidate is the name of that class, then skip
	// it as it is unlikely a qualified version of the class' constructor
	// is an appropriate correction.
	if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() :
	nullptr) {
	if (NSDecl->getIdentifier() == QR.getCorrectionAsIdentifierInfo())
	continue;
	}

	TypoCorrection TC(QR);
	TC.ClearCorrectionDecls();
	TC.setCorrectionSpecifier(NSI.NameSpecifier);
	TC.setQualifierDistance(NSI.EditDistance);
	TC.setCallbackDistance(0); // Reset the callback distance

	// If the current correction candidate and namespace combination are
	// too far away from the original typo based on the normalized edit
	// distance, then skip performing a qualified name lookup.
	unsigned TmpED = TC.getEditDistance(true);
	if (QR.getCorrectionAsIdentifierInfo() != Typo && TmpED &&
	TypoLen / TmpED < 3)
	continue;

	Result.clear();
	Result.setLookupName(QR.getCorrectionAsIdentifierInfo());
	if (!SemaRef.LookupQualifiedName(Result, Ctx))
	continue;

	// Any corrections added below will be validated in subsequent
	// iterations of the main while() loop over the Consumer's contents.
	switch (Result.getResultKind()) {
	case LookupResult::Found:
	case LookupResult::FoundOverloaded: {
	if (SS && SS->isValid()) {
	std::string NewQualified = TC.getAsString(SemaRef.getLangOpts());
	std::string OldQualified;
	llvm::raw_string_ostream OldOStream(OldQualified);
	SS->getScopeRep()->print(OldOStream, SemaRef.getPrintingPolicy());
	OldOStream << Typo->getName();
	// If correction candidate would be an identical written qualified
	// identifier, then the existing CXXScopeSpec probably included a
	// typedef that didn't get accounted for properly.
	if (OldOStream.str() == NewQualified)
	break;
	}
	for (LookupResult::iterator TRD = Result.begin(), TRDEnd = Result.end();
	TRD != TRDEnd; ++TRD) {
	if (SemaRef.CheckMemberAccess(TC.getCorrectionRange().getBegin(),
	NSType ? NSType->getAsCXXRecordDecl()
	: nullptr,
	TRD.getPair()) == Sema::AR_accessible)
	TC.addCorrectionDecl(*TRD);
	}
	if (TC.isResolved()) {
	TC.setCorrectionRange(SS.get(), Result.getLookupNameInfo());
	addCorrection(TC);
	}
	break;
	}
	case LookupResult::NotFound:
	case LookupResult::NotFoundInCurrentInstantiation:
	case LookupResult::Ambiguous:
	case LookupResult::FoundUnresolvedValue:
	break;
	}
	}
	}
	QualifiedResults.clear();
	}

	TypoCorrectionConsumer::NamespaceSpecifierSet::NamespaceSpecifierSet(
	ASTContext &Context, DeclContext CurContext, CXXScopeSpec CurScopeSpec)
	: Context(Context), CurContextChain(buildContextChain(CurContext)) {
	if (NestedNameSpecifier *NNS =
	CurScopeSpec ? CurScopeSpec->getScopeRep() : nullptr) {
	llvm::raw_string_ostream SpecifierOStream(CurNameSpecifier);
	NNS->print(SpecifierOStream, Context.getPrintingPolicy());

	getNestedNameSpecifierIdentifiers(NNS, CurNameSpecifierIdentifiers);
	}
	// Build the list of identifiers that would be used for an absolute
	// (from the global context) NestedNameSpecifier referring to the current
	// context.
	for (DeclContext *C : llvm::reverse(CurContextChain)) {
	if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C))
	CurContextIdentifiers.push_back(ND->getIdentifier());
	}

	// Add the global context as a NestedNameSpecifier
	SpecifierInfo SI = {cast<DeclContext>(Context.getTranslationUnitDecl()),
	NestedNameSpecifier::GlobalSpecifier(Context), 1};
	DistanceMap[1].push_back(SI);
	}

	auto TypoCorrectionConsumer::NamespaceSpecifierSet::buildContextChain(
	DeclContext *Start) -> DeclContextList {
	assert(Start && "Building a context chain from a null context");
	DeclContextList Chain;
	for (DeclContext *DC = Start->getPrimaryContext(); DC != nullptr;
	DC = DC->getLookupParent()) {
	NamespaceDecl *ND = dyn_cast_or_null<NamespaceDecl>(DC);
	if (!DC->isInlineNamespace() && !DC->isTransparentContext() &&
	!(ND && ND->isAnonymousNamespace()))
	Chain.push_back(DC->getPrimaryContext());
	}
	return Chain;
	}

	unsigned
	TypoCorrectionConsumer::NamespaceSpecifierSet::buildNestedNameSpecifier(
	DeclContextList &DeclChain, NestedNameSpecifier *&NNS) {
	unsigned NumSpecifiers = 0;
	for (DeclContext *C : llvm::reverse(DeclChain)) {
	if (auto *ND = dyn_cast_or_null<NamespaceDecl>(C)) {
	NNS = NestedNameSpecifier::Create(Context, NNS, ND);
	++NumSpecifiers;
	} else if (auto *RD = dyn_cast_or_null<RecordDecl>(C)) {
	NNS = NestedNameSpecifier::Create(Context, NNS, RD->isTemplateDecl(),
	RD->getTypeForDecl());
	++NumSpecifiers;
	}
	}
	return NumSpecifiers;
	}

	void TypoCorrectionConsumer::NamespaceSpecifierSet::addNameSpecifier(
	DeclContext *Ctx) {
	NestedNameSpecifier *NNS = nullptr;
	unsigned NumSpecifiers = 0;
	DeclContextList NamespaceDeclChain(buildContextChain(Ctx));
	DeclContextList FullNamespaceDeclChain(NamespaceDeclChain);

	// Eliminate common elements from the two DeclContext chains.
	for (DeclContext *C : llvm::reverse(CurContextChain)) {
	if (NamespaceDeclChain.empty() \|\| NamespaceDeclChain.back() != C)
	break;
	NamespaceDeclChain.pop_back();
	}

	// Build the NestedNameSpecifier from what is left of the NamespaceDeclChain
	NumSpecifiers = buildNestedNameSpecifier(NamespaceDeclChain, NNS);

	// Add an explicit leading '::' specifier if needed.
	if (NamespaceDeclChain.empty()) {
	// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
	NNS = NestedNameSpecifier::GlobalSpecifier(Context);
	NumSpecifiers =
	buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
	} else if (NamedDecl *ND =
	dyn_cast_or_null<NamedDecl>(NamespaceDeclChain.back())) {
	IdentifierInfo *Name = ND->getIdentifier();
	bool SameNameSpecifier = false;
	if (llvm::is_contained(CurNameSpecifierIdentifiers, Name)) {
	std::string NewNameSpecifier;
	llvm::raw_string_ostream SpecifierOStream(NewNameSpecifier);
	SmallVector<const IdentifierInfo *, 4> NewNameSpecifierIdentifiers;
	getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
	NNS->print(SpecifierOStream, Context.getPrintingPolicy());
	SpecifierOStream.flush();
	SameNameSpecifier = NewNameSpecifier == CurNameSpecifier;
	}
	if (SameNameSpecifier \|\| llvm::is_contained(CurContextIdentifiers, Name)) {
	// Rebuild the NestedNameSpecifier as a globally-qualified specifier.
	NNS = NestedNameSpecifier::GlobalSpecifier(Context);
	NumSpecifiers =
	buildNestedNameSpecifier(FullNamespaceDeclChain, NNS);
	}
	}

	// If the built NestedNameSpecifier would be replacing an existing
	// NestedNameSpecifier, use the number of component identifiers that
	// would need to be changed as the edit distance instead of the number
	// of components in the built NestedNameSpecifier.
	if (NNS && !CurNameSpecifierIdentifiers.empty()) {
	SmallVector<const IdentifierInfo*, 4> NewNameSpecifierIdentifiers;
	getNestedNameSpecifierIdentifiers(NNS, NewNameSpecifierIdentifiers);
	NumSpecifiers = llvm::ComputeEditDistance(
	llvm::makeArrayRef(CurNameSpecifierIdentifiers),
	llvm::makeArrayRef(NewNameSpecifierIdentifiers));
	}

	SpecifierInfo SI = {Ctx, NNS, NumSpecifiers};
	DistanceMap[NumSpecifiers].push_back(SI);
	}

	/// Perform name lookup for a possible result for typo correction.
	static void LookupPotentialTypoResult(Sema &SemaRef,
	LookupResult &Res,
	IdentifierInfo *Name,
	Scope S, CXXScopeSpec SS,
	DeclContext *MemberContext,
	bool EnteringContext,
	bool isObjCIvarLookup,
	bool FindHidden) {
	Res.suppressDiagnostics();
	Res.clear();
	Res.setLookupName(Name);
	Res.setAllowHidden(FindHidden);
	if (MemberContext) {
	if (ObjCInterfaceDecl *Class = dyn_cast<ObjCInterfaceDecl>(MemberContext)) {
	if (isObjCIvarLookup) {
	if (ObjCIvarDecl *Ivar = Class->lookupInstanceVariable(Name)) {
	Res.addDecl(Ivar);
	Res.resolveKind();
	return;
	}
	}

	if (ObjCPropertyDecl *Prop = Class->FindPropertyDeclaration(
	Name, ObjCPropertyQueryKind::OBJC_PR_query_instance)) {
	Res.addDecl(Prop);
	Res.resolveKind();
	return;
	}
	}

	SemaRef.LookupQualifiedName(Res, MemberContext);
	return;
	}

	SemaRef.LookupParsedName(Res, S, SS, /AllowBuiltinCreation=/false,
	EnteringContext);

	// Fake ivar lookup; this should really be part of
	// LookupParsedName.
	if (ObjCMethodDecl *Method = SemaRef.getCurMethodDecl()) {
	if (Method->isInstanceMethod() && Method->getClassInterface() &&
	(Res.empty() \|\|
	(Res.isSingleResult() &&
	Res.getFoundDecl()->isDefinedOutsideFunctionOrMethod()))) {
	if (ObjCIvarDecl *IV
	= Method->getClassInterface()->lookupInstanceVariable(Name)) {
	Res.addDecl(IV);
	Res.resolveKind();
	}
	}
	}
	}

	/// Add keywords to the consumer as possible typo corrections.
	static void AddKeywordsToConsumer(Sema &SemaRef,
	TypoCorrectionConsumer &Consumer,
	Scope *S, CorrectionCandidateCallback &CCC,
	bool AfterNestedNameSpecifier) {
	if (AfterNestedNameSpecifier) {
	// For 'X::', we know exactly which keywords can appear next.
	Consumer.addKeywordResult("template");
	if (CCC.WantExpressionKeywords)
	Consumer.addKeywordResult("operator");
	return;
	}

	if (CCC.WantObjCSuper)
	Consumer.addKeywordResult("super");

	if (CCC.WantTypeSpecifiers) {
	// Add type-specifier keywords to the set of results.
	static const char *const CTypeSpecs[] = {
	"char", "const", "double", "enum", "float", "int", "long", "short",
	"signed", "struct", "union", "unsigned", "void", "volatile",
	"_Complex", "_Imaginary",
	// storage-specifiers as well
	"extern", "inline", "static", "typedef"
	};

	const unsigned NumCTypeSpecs = llvm::array_lengthof(CTypeSpecs);
	for (unsigned I = 0; I != NumCTypeSpecs; ++I)
	Consumer.addKeywordResult(CTypeSpecs[I]);

	if (SemaRef.getLangOpts().C99)
	Consumer.addKeywordResult("restrict");
	if (SemaRef.getLangOpts().Bool \|\| SemaRef.getLangOpts().CPlusPlus)
	Consumer.addKeywordResult("bool");
	else if (SemaRef.getLangOpts().C99)
	Consumer.addKeywordResult("_Bool");

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("class");
	Consumer.addKeywordResult("typename");
	Consumer.addKeywordResult("wchar_t");

	if (SemaRef.getLangOpts().CPlusPlus11) {
	Consumer.addKeywordResult("char16_t");
	Consumer.addKeywordResult("char32_t");
	Consumer.addKeywordResult("constexpr");
	Consumer.addKeywordResult("decltype");
	Consumer.addKeywordResult("thread_local");
	}
	}

	if (SemaRef.getLangOpts().GNUKeywords)
	Consumer.addKeywordResult("typeof");
	} else if (CCC.WantFunctionLikeCasts) {
	static const char *const CastableTypeSpecs[] = {
	"char", "double", "float", "int", "long", "short",
	"signed", "unsigned", "void"
	};
	for (auto *kw : CastableTypeSpecs)
	Consumer.addKeywordResult(kw);
	}

	if (CCC.WantCXXNamedCasts && SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("const_cast");
	Consumer.addKeywordResult("dynamic_cast");
	Consumer.addKeywordResult("reinterpret_cast");
	Consumer.addKeywordResult("static_cast");
	}

	if (CCC.WantExpressionKeywords) {
	Consumer.addKeywordResult("sizeof");
	if (SemaRef.getLangOpts().Bool \|\| SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("false");
	Consumer.addKeywordResult("true");
	}

	if (SemaRef.getLangOpts().CPlusPlus) {
	static const char *const CXXExprs[] = {
	"delete", "new", "operator", "throw", "typeid"
	};
	const unsigned NumCXXExprs = llvm::array_lengthof(CXXExprs);
	for (unsigned I = 0; I != NumCXXExprs; ++I)
	Consumer.addKeywordResult(CXXExprs[I]);

	if (isa<CXXMethodDecl>(SemaRef.CurContext) &&
	cast<CXXMethodDecl>(SemaRef.CurContext)->isInstance())
	Consumer.addKeywordResult("this");

	if (SemaRef.getLangOpts().CPlusPlus11) {
	Consumer.addKeywordResult("alignof");
	Consumer.addKeywordResult("nullptr");
	}
	}

	if (SemaRef.getLangOpts().C11) {
	// FIXME: We should not suggest _Alignof if the alignof macro
	// is present.
	Consumer.addKeywordResult("_Alignof");
	}
	}

	if (CCC.WantRemainingKeywords) {
	if (SemaRef.getCurFunctionOrMethodDecl() \|\| SemaRef.getCurBlock()) {
	// Statements.
	static const char *const CStmts[] = {
	"do", "else", "for", "goto", "if", "return", "switch", "while" };
	const unsigned NumCStmts = llvm::array_lengthof(CStmts);
	for (unsigned I = 0; I != NumCStmts; ++I)
	Consumer.addKeywordResult(CStmts[I]);

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("catch");
	Consumer.addKeywordResult("try");
	}

	if (S && S->getBreakParent())
	Consumer.addKeywordResult("break");

	if (S && S->getContinueParent())
	Consumer.addKeywordResult("continue");

	if (SemaRef.getCurFunction() &&
	!SemaRef.getCurFunction()->SwitchStack.empty()) {
	Consumer.addKeywordResult("case");
	Consumer.addKeywordResult("default");
	}
	} else {
	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("namespace");
	Consumer.addKeywordResult("template");
	}

	if (S && S->isClassScope()) {
	Consumer.addKeywordResult("explicit");
	Consumer.addKeywordResult("friend");
	Consumer.addKeywordResult("mutable");
	Consumer.addKeywordResult("private");
	Consumer.addKeywordResult("protected");
	Consumer.addKeywordResult("public");
	Consumer.addKeywordResult("virtual");
	}
	}

	if (SemaRef.getLangOpts().CPlusPlus) {
	Consumer.addKeywordResult("using");

	if (SemaRef.getLangOpts().CPlusPlus11)
	Consumer.addKeywordResult("static_assert");
	}
	}
	}

	std::unique_ptr<TypoCorrectionConsumer> Sema::makeTypoCorrectionConsumer(
	const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS, CorrectionCandidateCallback &CCC,
	DeclContext *MemberContext, bool EnteringContext,
	const ObjCObjectPointerType *OPT, bool ErrorRecovery) {

	if (Diags.hasFatalErrorOccurred() \|\| !getLangOpts().SpellChecking \|\|
	DisableTypoCorrection)
	return nullptr;

	// In Microsoft mode, don't perform typo correction in a template member
	// function dependent context because it interferes with the "lookup into
	// dependent bases of class templates" feature.
	if (getLangOpts().MSVCCompat && CurContext->isDependentContext() &&
	isa<CXXMethodDecl>(CurContext))
	return nullptr;

	// We only attempt to correct typos for identifiers.
	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	if (!Typo)
	return nullptr;

	// If the scope specifier itself was invalid, don't try to correct
	// typos.
	if (SS && SS->isInvalid())
	return nullptr;

	// Never try to correct typos during any kind of code synthesis.
	if (!CodeSynthesisContexts.empty())
	return nullptr;

	// Don't try to correct 'super'.
	if (S && S->isInObjcMethodScope() && Typo == getSuperIdentifier())
	return nullptr;

	// Abort if typo correction already failed for this specific typo.
	IdentifierSourceLocations::iterator locs = TypoCorrectionFailures.find(Typo);
	if (locs != TypoCorrectionFailures.end() &&
	locs->second.count(TypoName.getLoc()))
	return nullptr;

	// Don't try to correct the identifier "vector" when in AltiVec mode.
	// TODO: Figure out why typo correction misbehaves in this case, fix it, and
	// remove this workaround.
	if ((getLangOpts().AltiVec \|\| getLangOpts().ZVector) && Typo->isStr("vector"))
	return nullptr;

	// Provide a stop gap for files that are just seriously broken. Trying
	// to correct all typos can turn into a HUGE performance penalty, causing
	// some files to take minutes to get rejected by the parser.
	unsigned Limit = getDiagnostics().getDiagnosticOptions().SpellCheckingLimit;
	if (Limit && TyposCorrected >= Limit)
	return nullptr;
	++TyposCorrected;

	// If we're handling a missing symbol error, using modules, and the
	// special search all modules option is used, look for a missing import.
	if (ErrorRecovery && getLangOpts().Modules &&
	getLangOpts().ModulesSearchAll) {
	// The following has the side effect of loading the missing module.
	getModuleLoader().lookupMissingImports(Typo->getName(),
	TypoName.getBeginLoc());
	}

	// Extend the lifetime of the callback. We delayed this until here
	// to avoid allocations in the hot path (which is where no typo correction
	// occurs). Note that CorrectionCandidateCallback is polymorphic and
	// initially stack-allocated.
	std::unique_ptr<CorrectionCandidateCallback> ClonedCCC = CCC.clone();
	auto Consumer = std::make_unique<TypoCorrectionConsumer>(
	*this, TypoName, LookupKind, S, SS, std::move(ClonedCCC), MemberContext,
	EnteringContext);

	// Perform name lookup to find visible, similarly-named entities.
	bool IsUnqualifiedLookup = false;
	DeclContext *QualifiedDC = MemberContext;
	if (MemberContext) {
	LookupVisibleDecls(MemberContext, LookupKind, *Consumer);

	// Look in qualified interfaces.
	if (OPT) {
	for (auto *I : OPT->quals())
	LookupVisibleDecls(I, LookupKind, *Consumer);
	}
	} else if (SS && SS->isSet()) {
	QualifiedDC = computeDeclContext(*SS, EnteringContext);
	if (!QualifiedDC)
	return nullptr;

	LookupVisibleDecls(QualifiedDC, LookupKind, *Consumer);
	} else {
	IsUnqualifiedLookup = true;
	}

	// Determine whether we are going to search in the various namespaces for
	// corrections.
	bool SearchNamespaces
	= getLangOpts().CPlusPlus &&
	(IsUnqualifiedLookup \|\| (SS && SS->isSet()));

	if (IsUnqualifiedLookup \|\| SearchNamespaces) {
	// For unqualified lookup, look through all of the names that we have
	// seen in this translation unit.
	// FIXME: Re-add the ability to skip very unlikely potential corrections.
	for (const auto &I : Context.Idents)
	Consumer->FoundName(I.getKey());

	// Walk through identifiers in external identifier sources.
	// FIXME: Re-add the ability to skip very unlikely potential corrections.
	if (IdentifierInfoLookup *External
	= Context.Idents.getExternalIdentifierLookup()) {
	std::unique_ptr<IdentifierIterator> Iter(External->getIdentifiers());
	do {
	StringRef Name = Iter->Next();
	if (Name.empty())
	break;

	Consumer->FoundName(Name);
	} while (true);
	}
	}

	AddKeywordsToConsumer(this, Consumer, S,
	*Consumer->getCorrectionValidator(),
	SS && SS->isNotEmpty());

	// Build the NestedNameSpecifiers for the KnownNamespaces, if we're going
	// to search those namespaces.
	if (SearchNamespaces) {
	// Load any externally-known namespaces.
	if (ExternalSource && !LoadedExternalKnownNamespaces) {
	SmallVector<NamespaceDecl *, 4> ExternalKnownNamespaces;
	LoadedExternalKnownNamespaces = true;
	ExternalSource->ReadKnownNamespaces(ExternalKnownNamespaces);
	for (auto *N : ExternalKnownNamespaces)
	KnownNamespaces[N] = true;
	}

	Consumer->addNamespaces(KnownNamespaces);
	}

	return Consumer;
	}

	/// Try to "correct" a typo in the source code by finding
	/// visible declarations whose names are similar to the name that was
	/// present in the source code.
	///
	/// \param TypoName the \c DeclarationNameInfo structure that contains
	/// the name that was present in the source code along with its location.
	///
	/// \param LookupKind the name-lookup criteria used to search for the name.
	///
	/// \param S the scope in which name lookup occurs.
	///
	/// \param SS the nested-name-specifier that precedes the name we're
	/// looking for, if present.
	///
	/// \param CCC A CorrectionCandidateCallback object that provides further
	/// validation of typo correction candidates. It also provides flags for
	/// determining the set of keywords permitted.
	///
	/// \param MemberContext if non-NULL, the context in which to look for
	/// a member access expression.
	///
	/// \param EnteringContext whether we're entering the context described by
	/// the nested-name-specifier SS.
	///
	/// \param OPT when non-NULL, the search for visible declarations will
	/// also walk the protocols in the qualified interfaces of \p OPT.
	///
	/// \returns a \c TypoCorrection containing the corrected name if the typo
	/// along with information such as the \c NamedDecl where the corrected name
	/// was declared, and any additional \c NestedNameSpecifier needed to access
	/// it (C++ only). The \c TypoCorrection is empty if there is no correction.
	TypoCorrection Sema::CorrectTypo(const DeclarationNameInfo &TypoName,
	Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS,
	CorrectionCandidateCallback &CCC,
	CorrectTypoKind Mode,
	DeclContext *MemberContext,
	bool EnteringContext,
	const ObjCObjectPointerType *OPT,
	bool RecordFailure) {
	// Always let the ExternalSource have the first chance at correction, even
	// if we would otherwise have given up.
	if (ExternalSource) {
	if (TypoCorrection Correction =
	ExternalSource->CorrectTypo(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext, OPT))
	return Correction;
	}

	// Ugly hack equivalent to CTC == CTC_ObjCMessageReceiver;
	// WantObjCSuper is only true for CTC_ObjCMessageReceiver and for
	// some instances of CTC_Unknown, while WantRemainingKeywords is true
	// for CTC_Unknown but not for CTC_ObjCMessageReceiver.
	bool ObjCMessageReceiver = CCC.WantObjCSuper && !CCC.WantRemainingKeywords;

	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext,
	OPT, Mode == CTK_ErrorRecovery);

	if (!Consumer)
	return TypoCorrection();

	// If we haven't found anything, we're done.
	if (Consumer->empty())
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	// Make sure the best edit distance (prior to adding any namespace qualifiers)
	// is not more that about a third of the length of the typo's identifier.
	unsigned ED = Consumer->getBestEditDistance(true);
	unsigned TypoLen = Typo->getName().size();
	if (ED > 0 && TypoLen / ED < 3)
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	TypoCorrection BestTC = Consumer->getNextCorrection();
	TypoCorrection SecondBestTC = Consumer->getNextCorrection();
	if (!BestTC)
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	ED = BestTC.getEditDistance();

	if (TypoLen >= 3 && ED > 0 && TypoLen / ED < 3) {
	// If this was an unqualified lookup and we believe the callback
	// object wouldn't have filtered out possible corrections, note
	// that no correction was found.
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);
	}

	// If only a single name remains, return that result.
	if (!SecondBestTC \|\|
	SecondBestTC.getEditDistance(false) > BestTC.getEditDistance(false)) {
	const TypoCorrection &Result = BestTC;

	// Don't correct to a keyword that's the same as the typo; the keyword
	// wasn't actually in scope.
	if (ED == 0 && Result.isKeyword())
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	TypoCorrection TC = Result;
	TC.setCorrectionRange(SS, TypoName);
	checkCorrectionVisibility(*this, TC);
	return TC;
	} else if (SecondBestTC && ObjCMessageReceiver) {
	// Prefer 'super' when we're completing in a message-receiver
	// context.

	if (BestTC.getCorrection().getAsString() != "super") {
	if (SecondBestTC.getCorrection().getAsString() == "super")
	BestTC = SecondBestTC;
	else if ((*Consumer)["super"].front().isKeyword())
	BestTC = (*Consumer)["super"].front();
	}
	// Don't correct to a keyword that's the same as the typo; the keyword
	// wasn't actually in scope.
	if (BestTC.getEditDistance() == 0 \|\|
	BestTC.getCorrection().getAsString() != "super")
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure);

	BestTC.setCorrectionRange(SS, TypoName);
	return BestTC;
	}

	// Record the failure's location if needed and return an empty correction. If
	// this was an unqualified lookup and we believe the callback object did not
	// filter out possible corrections, also cache the failure for the typo.
	return FailedCorrection(Typo, TypoName.getLoc(), RecordFailure && !SecondBestTC);
	}

	/// Try to "correct" a typo in the source code by finding
	/// visible declarations whose names are similar to the name that was
	/// present in the source code.
	///
	/// \param TypoName the \c DeclarationNameInfo structure that contains
	/// the name that was present in the source code along with its location.
	///
	/// \param LookupKind the name-lookup criteria used to search for the name.
	///
	/// \param S the scope in which name lookup occurs.
	///
	/// \param SS the nested-name-specifier that precedes the name we're
	/// looking for, if present.
	///
	/// \param CCC A CorrectionCandidateCallback object that provides further
	/// validation of typo correction candidates. It also provides flags for
	/// determining the set of keywords permitted.
	///
	/// \param TDG A TypoDiagnosticGenerator functor that will be used to print
	/// diagnostics when the actual typo correction is attempted.
	///
	/// \param TRC A TypoRecoveryCallback functor that will be used to build an
	/// Expr from a typo correction candidate.
	///
	/// \param MemberContext if non-NULL, the context in which to look for
	/// a member access expression.
	///
	/// \param EnteringContext whether we're entering the context described by
	/// the nested-name-specifier SS.
	///
	/// \param OPT when non-NULL, the search for visible declarations will
	/// also walk the protocols in the qualified interfaces of \p OPT.
	///
	/// \returns a new \c TypoExpr that will later be replaced in the AST with an
	/// Expr representing the result of performing typo correction, or nullptr if
	/// typo correction is not possible. If nullptr is returned, no diagnostics will
	/// be emitted and it is the responsibility of the caller to emit any that are
	/// needed.
	TypoExpr *Sema::CorrectTypoDelayed(
	const DeclarationNameInfo &TypoName, Sema::LookupNameKind LookupKind,
	Scope S, CXXScopeSpec SS, CorrectionCandidateCallback &CCC,
	TypoDiagnosticGenerator TDG, TypoRecoveryCallback TRC, CorrectTypoKind Mode,
	DeclContext *MemberContext, bool EnteringContext,
	const ObjCObjectPointerType *OPT) {
	auto Consumer = makeTypoCorrectionConsumer(TypoName, LookupKind, S, SS, CCC,
	MemberContext, EnteringContext,
	OPT, Mode == CTK_ErrorRecovery);

	// Give the external sema source a chance to correct the typo.
	TypoCorrection ExternalTypo;
	if (ExternalSource && Consumer) {
	ExternalTypo = ExternalSource->CorrectTypo(
	TypoName, LookupKind, S, SS, *Consumer->getCorrectionValidator(),
	MemberContext, EnteringContext, OPT);
	if (ExternalTypo)
	Consumer->addCorrection(ExternalTypo);
	}

	if (!Consumer \|\| Consumer->empty())
	return nullptr;

	// Make sure the best edit distance (prior to adding any namespace qualifiers)
	// is not more that about a third of the length of the typo's identifier.
	unsigned ED = Consumer->getBestEditDistance(true);
	IdentifierInfo *Typo = TypoName.getName().getAsIdentifierInfo();
	if (!ExternalTypo && ED > 0 && Typo->getName().size() / ED < 3)
	return nullptr;
	ExprEvalContexts.back().NumTypos++;
	return createDelayedTypo(std::move(Consumer), std::move(TDG), std::move(TRC),
	TypoName.getLoc());
	}

	void TypoCorrection::addCorrectionDecl(NamedDecl *CDecl) {
	if (!CDecl) return;

	if (isKeyword())
	CorrectionDecls.clear();

	CorrectionDecls.push_back(CDecl);

	if (!CorrectionName)
	CorrectionName = CDecl->getDeclName();
	}

	std::string TypoCorrection::getAsString(const LangOptions &LO) const {
	if (CorrectionNameSpec) {
	std::string tmpBuffer;
	llvm::raw_string_ostream PrefixOStream(tmpBuffer);
	CorrectionNameSpec->print(PrefixOStream, PrintingPolicy(LO));
	PrefixOStream << CorrectionName;
	return PrefixOStream.str();
	}

	return CorrectionName.getAsString();
	}

	bool CorrectionCandidateCallback::ValidateCandidate(
	const TypoCorrection &candidate) {
	if (!candidate.isResolved())
	return true;

	if (candidate.isKeyword())
	return WantTypeSpecifiers \|\| WantExpressionKeywords \|\| WantCXXNamedCasts \|\|
	WantRemainingKeywords \|\| WantObjCSuper;

	bool HasNonType = false;
	bool HasStaticMethod = false;
	bool HasNonStaticMethod = false;
	for (Decl *D : candidate) {
	if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(D))
	D = FTD->getTemplatedDecl();
	if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
	if (Method->isStatic())
	HasStaticMethod = true;
	else
	HasNonStaticMethod = true;
	}
	if (!isa<TypeDecl>(D))
	HasNonType = true;
	}

	if (IsAddressOfOperand && HasNonStaticMethod && !HasStaticMethod &&
	!candidate.getCorrectionSpecifier())
	return false;

	return WantTypeSpecifiers \|\| HasNonType;
	}

	FunctionCallFilterCCC::FunctionCallFilterCCC(Sema &SemaRef, unsigned NumArgs,
	bool HasExplicitTemplateArgs,
	MemberExpr *ME)
	: NumArgs(NumArgs), HasExplicitTemplateArgs(HasExplicitTemplateArgs),
	CurContext(SemaRef.CurContext), MemberFn(ME) {
	WantTypeSpecifiers = false;
	WantFunctionLikeCasts = SemaRef.getLangOpts().CPlusPlus &&
	!HasExplicitTemplateArgs && NumArgs == 1;
	WantCXXNamedCasts = HasExplicitTemplateArgs && NumArgs == 1;
	WantRemainingKeywords = false;
	}

	bool FunctionCallFilterCCC::ValidateCandidate(const TypoCorrection &candidate) {
	if (!candidate.getCorrectionDecl())
	return candidate.isKeyword();

	for (auto *C : candidate) {
	FunctionDecl *FD = nullptr;
	NamedDecl *ND = C->getUnderlyingDecl();
	if (FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(ND))
	FD = FTD->getTemplatedDecl();
	if (!HasExplicitTemplateArgs && !FD) {
	if (!(FD = dyn_cast<FunctionDecl>(ND)) && isa<ValueDecl>(ND)) {
	// If the Decl is neither a function nor a template function,
	// determine if it is a pointer or reference to a function. If so,
	// check against the number of arguments expected for the pointee.
	QualType ValType = cast<ValueDecl>(ND)->getType();
	if (ValType.isNull())
	continue;
	if (ValType->isAnyPointerType() \|\| ValType->isReferenceType())
	ValType = ValType->getPointeeType();
	if (const FunctionProtoType *FPT = ValType->getAs<FunctionProtoType>())
	if (FPT->getNumParams() == NumArgs)
	return true;
	}
	}

	// A typo for a function-style cast can look like a function call in C++.
	if ((HasExplicitTemplateArgs ? getAsTypeTemplateDecl(ND) != nullptr
	: isa<TypeDecl>(ND)) &&
	CurContext->getParentASTContext().getLangOpts().CPlusPlus)
	// Only a class or class template can take two or more arguments.
	return NumArgs <= 1 \|\| HasExplicitTemplateArgs \|\| isa<CXXRecordDecl>(ND);

	// Skip the current candidate if it is not a FunctionDecl or does not accept
	// the current number of arguments.
	if (!FD \|\| !(FD->getNumParams() >= NumArgs &&
	FD->getMinRequiredArguments() <= NumArgs))
	continue;

	// If the current candidate is a non-static C++ method, skip the candidate
	// unless the method being corrected--or the current DeclContext, if the
	// function being corrected is not a method--is a method in the same class
	// or a descendent class of the candidate's parent class.
	if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(FD)) {
	if (MemberFn \|\| !MD->isStatic()) {
	CXXMethodDecl *CurMD =
	MemberFn
	? dyn_cast_or_null<CXXMethodDecl>(MemberFn->getMemberDecl())
	: dyn_cast_or_null<CXXMethodDecl>(CurContext);
	CXXRecordDecl *CurRD =
	CurMD ? CurMD->getParent()->getCanonicalDecl() : nullptr;
	CXXRecordDecl *RD = MD->getParent()->getCanonicalDecl();
	if (!CurRD \|\| (CurRD != RD && !CurRD->isDerivedFrom(RD)))
	continue;
	}
	}
	return true;
	}
	return false;
	}

	void Sema::diagnoseTypo(const TypoCorrection &Correction,
	const PartialDiagnostic &TypoDiag,
	bool ErrorRecovery) {
	diagnoseTypo(Correction, TypoDiag, PDiag(diag::note_previous_decl),
	ErrorRecovery);
	}

	/// Find which declaration we should import to provide the definition of
	/// the given declaration.
	static NamedDecl getDefinitionToImport(NamedDecl D) {
	if (VarDecl *VD = dyn_cast<VarDecl>(D))
	return VD->getDefinition();
	if (FunctionDecl *FD = dyn_cast<FunctionDecl>(D))
	return FD->getDefinition();
	if (TagDecl *TD = dyn_cast<TagDecl>(D))
	return TD->getDefinition();
	if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
	return ID->getDefinition();
	if (ObjCProtocolDecl *PD = dyn_cast<ObjCProtocolDecl>(D))
	return PD->getDefinition();
	if (TemplateDecl *TD = dyn_cast<TemplateDecl>(D))
	if (NamedDecl *TTD = TD->getTemplatedDecl())
	return getDefinitionToImport(TTD);
	return nullptr;
	}

	void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl,
	MissingImportKind MIK, bool Recover) {
	// Suggest importing a module providing the definition of this entity, if
	// possible.
	NamedDecl *Def = getDefinitionToImport(Decl);
	if (!Def)
	Def = Decl;

	Module *Owner = getOwningModule(Def);
	assert(Owner && "definition of hidden declaration is not in a module");

	llvm::SmallVector<Module*, 8> OwningModules;
	OwningModules.push_back(Owner);
	auto Merged = Context.getModulesWithMergedDefinition(Def);
	OwningModules.insert(OwningModules.end(), Merged.begin(), Merged.end());

	diagnoseMissingImport(Loc, Def, Def->getLocation(), OwningModules, MIK,
	Recover);
	}

	/// Get a "quoted.h" or <angled.h> include path to use in a diagnostic
	/// suggesting the addition of a #include of the specified file.
	static std::string getHeaderNameForHeader(Preprocessor &PP, const FileEntry *E,
	llvm::StringRef IncludingFile) {
	bool IsSystem = false;
	auto Path = PP.getHeaderSearchInfo().suggestPathToFileForDiagnostics(
	E, IncludingFile, &IsSystem);
	return (IsSystem ? '<' : '"') + Path + (IsSystem ? '>' : '"');
	}

	void Sema::diagnoseMissingImport(SourceLocation UseLoc, NamedDecl *Decl,
	SourceLocation DeclLoc,
	ArrayRef<Module *> Modules,
	MissingImportKind MIK, bool Recover) {
	assert(!Modules.empty());

	auto NotePrevious = [&] {
	// FIXME: Suppress the note backtrace even under
	// -fdiagnostics-show-note-include-stack. We don't care how this
	// declaration was previously reached.
	Diag(DeclLoc, diag::note_unreachable_entity) << (int)MIK;
	};

	// Weed out duplicates from module list.
	llvm::SmallVector<Module*, 8> UniqueModules;
	llvm::SmallDenseSet<Module*, 8> UniqueModuleSet;
	for (auto *M : Modules) {
	if (M->Kind == Module::GlobalModuleFragment)
	continue;
	if (UniqueModuleSet.insert(M).second)
	UniqueModules.push_back(M);
	}

	// Try to find a suitable header-name to #include.
	std::string HeaderName;
	if (const FileEntry *Header =
	PP.getHeaderToIncludeForDiagnostics(UseLoc, DeclLoc)) {
	if (const FileEntry *FE =
	SourceMgr.getFileEntryForID(SourceMgr.getFileID(UseLoc)))
	HeaderName = getHeaderNameForHeader(PP, Header, FE->tryGetRealPathName());
	}

	// If we have a #include we should suggest, or if all definition locations
	// were in global module fragments, don't suggest an import.
	if (!HeaderName.empty() \|\| UniqueModules.empty()) {
	// FIXME: Find a smart place to suggest inserting a #include, and add
	// a FixItHint there.
	Diag(UseLoc, diag::err_module_unimported_use_header)
	<< (int)MIK << Decl << !HeaderName.empty() << HeaderName;
	// Produce a note showing where the entity was declared.
	NotePrevious();
	if (Recover)
	createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
	return;
	}

	Modules = UniqueModules;

	if (Modules.size() > 1) {
	std::string ModuleList;
	unsigned N = 0;
	for (Module *M : Modules) {
	ModuleList += "\n ";
	if (++N == 5 && N != Modules.size()) {
	ModuleList += "[...]";
	break;
	}
	ModuleList += M->getFullModuleName();
	}

	Diag(UseLoc, diag::err_module_unimported_use_multiple)
	<< (int)MIK << Decl << ModuleList;
	} else {
	// FIXME: Add a FixItHint that imports the corresponding module.
	Diag(UseLoc, diag::err_module_unimported_use)
	<< (int)MIK << Decl << Modules[0]->getFullModuleName();
	}

	NotePrevious();

	// Try to recover by implicitly importing this module.
	if (Recover)
	createImplicitModuleImportForErrorRecovery(UseLoc, Modules[0]);
	}

	/// Diagnose a successfully-corrected typo. Separated from the correction
	/// itself to allow external validation of the result, etc.
	///
	/// \param Correction The result of performing typo correction.
	/// \param TypoDiag The diagnostic to produce. This will have the corrected
	/// string added to it (and usually also a fixit).
	/// \param PrevNote A note to use when indicating the location of the entity to
	/// which we are correcting. Will have the correction string added to it.
	/// \param ErrorRecovery If \c true (the default), the caller is going to
	/// recover from the typo as if the corrected string had been typed.
	/// In this case, \c PDiag must be an error, and we will attach a fixit
	/// to it.
	void Sema::diagnoseTypo(const TypoCorrection &Correction,
	const PartialDiagnostic &TypoDiag,
	const PartialDiagnostic &PrevNote,
	bool ErrorRecovery) {
	std::string CorrectedStr = Correction.getAsString(getLangOpts());
	std::string CorrectedQuotedStr = Correction.getQuoted(getLangOpts());
	FixItHint FixTypo = FixItHint::CreateReplacement(
	Correction.getCorrectionRange(), CorrectedStr);

	// Maybe we're just missing a module import.
	if (Correction.requiresImport()) {
	NamedDecl *Decl = Correction.getFoundDecl();
	assert(Decl && "import required but no declaration to import");

	diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
	MissingImportKind::Declaration, ErrorRecovery);
	return;
	}

	Diag(Correction.getCorrectionRange().getBegin(), TypoDiag)
	<< CorrectedQuotedStr << (ErrorRecovery ? FixTypo : FixItHint());

	NamedDecl *ChosenDecl =
	Correction.isKeyword() ? nullptr : Correction.getFoundDecl();
	if (PrevNote.getDiagID() && ChosenDecl)
	Diag(ChosenDecl->getLocation(), PrevNote)
	<< CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo);

	// Add any extra diagnostics.
	for (const PartialDiagnostic &PD : Correction.getExtraDiagnostics())
	Diag(Correction.getCorrectionRange().getBegin(), PD);
	}

	TypoExpr *Sema::createDelayedTypo(std::unique_ptr<TypoCorrectionConsumer> TCC,
	TypoDiagnosticGenerator TDG,
	TypoRecoveryCallback TRC,
	SourceLocation TypoLoc) {
	assert(TCC && "createDelayedTypo requires a valid TypoCorrectionConsumer");
	auto TE = new (Context) TypoExpr(Context.DependentTy, TypoLoc);
	auto &State = DelayedTypos[TE];
	State.Consumer = std::move(TCC);
	State.DiagHandler = std::move(TDG);
	State.RecoveryHandler = std::move(TRC);
	if (TE)
	TypoExprs.push_back(TE);
	return TE;
	}

	const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const {
	auto Entry = DelayedTypos.find(TE);
	assert(Entry != DelayedTypos.end() &&
	"Failed to get the state for a TypoExpr!");
	return Entry->second;
	}

	void Sema::clearDelayedTypo(TypoExpr *TE) {
	DelayedTypos.erase(TE);
	}

	void Sema::ActOnPragmaDump(Scope S, SourceLocation IILoc, IdentifierInfo II) {
	DeclarationNameInfo Name(II, IILoc);
	LookupResult R(*this, Name, LookupAnyName, Sema::NotForRedeclaration);
	R.suppressDiagnostics();
	R.setHideTags(false);
	LookupName(R, S);
	R.dump();
	}
	diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
	index 3ab5d26a9a75..edcac4d2ee9a 100644
	--- a/clang/lib/Sema/SemaType.cpp
	+++ b/clang/lib/Sema/SemaType.cpp
	@@ -1,9281 +1,9284 @@
	//===--- SemaType.cpp - Semantic Analysis for Types -----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements type-related semantic analysis.
	//
	//===----------------------------------------------------------------------===//

	#include "TypeLocBuilder.h"
	#include "clang/AST/ASTConsumer.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTMutationListener.h"
	#include "clang/AST/ASTStructuralEquivalence.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/TypeLoc.h"
	#include "clang/AST/TypeLocVisitor.h"
	#include "clang/Basic/PartialDiagnostic.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Lex/Preprocessor.h"
	#include "clang/Sema/DeclSpec.h"
	#include "clang/Sema/DelayedDiagnostic.h"
	#include "clang/Sema/Lookup.h"
	#include "clang/Sema/ParsedTemplate.h"
	#include "clang/Sema/ScopeInfo.h"
	#include "clang/Sema/SemaInternal.h"
	#include "clang/Sema/Template.h"
	#include "clang/Sema/TemplateInstCallback.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallString.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/Support/ErrorHandling.h"
	#include <bitset>

	using namespace clang;

	enum TypeDiagSelector {
	TDS_Function,
	TDS_Pointer,
	TDS_ObjCObjOrBlock
	};

	/// isOmittedBlockReturnType - Return true if this declarator is missing a
	/// return type because this is a omitted return type on a block literal.
	static bool isOmittedBlockReturnType(const Declarator &D) {
	if (D.getContext() != DeclaratorContext::BlockLiteral \|\|
	D.getDeclSpec().hasTypeSpecifier())
	return false;

	if (D.getNumTypeObjects() == 0)
	return true; // ^{ ... }

	if (D.getNumTypeObjects() == 1 &&
	D.getTypeObject(0).Kind == DeclaratorChunk::Function)
	return true; // ^(int X, float Y) { ... }

	return false;
	}

	/// diagnoseBadTypeAttribute - Diagnoses a type attribute which
	/// doesn't apply to the given type.
	static void diagnoseBadTypeAttribute(Sema &S, const ParsedAttr &attr,
	QualType type) {
	TypeDiagSelector WhichType;
	bool useExpansionLoc = true;
	switch (attr.getKind()) {
	case ParsedAttr::AT_ObjCGC:
	WhichType = TDS_Pointer;
	break;
	case ParsedAttr::AT_ObjCOwnership:
	WhichType = TDS_ObjCObjOrBlock;
	break;
	default:
	// Assume everything else was a function attribute.
	WhichType = TDS_Function;
	useExpansionLoc = false;
	break;
	}

	SourceLocation loc = attr.getLoc();
	StringRef name = attr.getAttrName()->getName();

	// The GC attributes are usually written with macros; special-case them.
	IdentifierInfo *II = attr.isArgIdent(0) ? attr.getArgAsIdent(0)->Ident
	: nullptr;
	if (useExpansionLoc && loc.isMacroID() && II) {
	if (II->isStr("strong")) {
	if (S.findMacroSpelling(loc, "__strong")) name = "__strong";
	} else if (II->isStr("weak")) {
	if (S.findMacroSpelling(loc, "__weak")) name = "__weak";
	}
	}

	S.Diag(loc, diag::warn_type_attribute_wrong_type) << name << WhichType
	<< type;
	}

	// objc_gc applies to Objective-C pointers or, otherwise, to the
	// smallest available pointer type (i.e. 'void' in 'void*').
	#define OBJC_POINTER_TYPE_ATTRS_CASELIST \
	case ParsedAttr::AT_ObjCGC: \
	case ParsedAttr::AT_ObjCOwnership

	// Calling convention attributes.
	#define CALLING_CONV_ATTRS_CASELIST \
	case ParsedAttr::AT_CDecl: \
	case ParsedAttr::AT_FastCall: \
	case ParsedAttr::AT_StdCall: \
	case ParsedAttr::AT_ThisCall: \
	case ParsedAttr::AT_RegCall: \
	case ParsedAttr::AT_Pascal: \
	case ParsedAttr::AT_SwiftCall: \
	case ParsedAttr::AT_SwiftAsyncCall: \
	case ParsedAttr::AT_VectorCall: \
	case ParsedAttr::AT_AArch64VectorPcs: \
	case ParsedAttr::AT_AArch64SVEPcs: \
	case ParsedAttr::AT_AMDGPUKernelCall: \
	case ParsedAttr::AT_MSABI: \
	case ParsedAttr::AT_SysVABI: \
	case ParsedAttr::AT_Pcs: \
	case ParsedAttr::AT_IntelOclBicc: \
	case ParsedAttr::AT_PreserveMost: \
	case ParsedAttr::AT_PreserveAll

	// Function type attributes.
	#define FUNCTION_TYPE_ATTRS_CASELIST \
	case ParsedAttr::AT_NSReturnsRetained: \
	case ParsedAttr::AT_NoReturn: \
	case ParsedAttr::AT_Regparm: \
	case ParsedAttr::AT_CmseNSCall: \
	case ParsedAttr::AT_AnyX86NoCallerSavedRegisters: \
	case ParsedAttr::AT_AnyX86NoCfCheck: \
	CALLING_CONV_ATTRS_CASELIST

	// Microsoft-specific type qualifiers.
	#define MS_TYPE_ATTRS_CASELIST \
	case ParsedAttr::AT_Ptr32: \
	case ParsedAttr::AT_Ptr64: \
	case ParsedAttr::AT_SPtr: \
	case ParsedAttr::AT_UPtr

	// Nullability qualifiers.
	#define NULLABILITY_TYPE_ATTRS_CASELIST \
	case ParsedAttr::AT_TypeNonNull: \
	case ParsedAttr::AT_TypeNullable: \
	case ParsedAttr::AT_TypeNullableResult: \
	case ParsedAttr::AT_TypeNullUnspecified

	namespace {
	/// An object which stores processing state for the entire
	/// GetTypeForDeclarator process.
	class TypeProcessingState {
	Sema &sema;

	/// The declarator being processed.
	Declarator &declarator;

	/// The index of the declarator chunk we're currently processing.
	/// May be the total number of valid chunks, indicating the
	/// DeclSpec.
	unsigned chunkIndex;

	/// The original set of attributes on the DeclSpec.
	SmallVector<ParsedAttr *, 2> savedAttrs;

	/// A list of attributes to diagnose the uselessness of when the
	/// processing is complete.
	SmallVector<ParsedAttr *, 2> ignoredTypeAttrs;

	/// Attributes corresponding to AttributedTypeLocs that we have not yet
	/// populated.
	// FIXME: The two-phase mechanism by which we construct Types and fill
	// their TypeLocs makes it hard to correctly assign these. We keep the
	// attributes in creation order as an attempt to make them line up
	// properly.
	using TypeAttrPair = std::pair<const AttributedType, const Attr>;
	SmallVector<TypeAttrPair, 8> AttrsForTypes;
	bool AttrsForTypesSorted = true;

	/// MacroQualifiedTypes mapping to macro expansion locations that will be
	/// stored in a MacroQualifiedTypeLoc.
	llvm::DenseMap<const MacroQualifiedType *, SourceLocation> LocsForMacros;

	/// Flag to indicate we parsed a noderef attribute. This is used for
	/// validating that noderef was used on a pointer or array.
	bool parsedNoDeref;

	public:
	TypeProcessingState(Sema &sema, Declarator &declarator)
	: sema(sema), declarator(declarator),
	chunkIndex(declarator.getNumTypeObjects()), parsedNoDeref(false) {}

	Sema &getSema() const {
	return sema;
	}

	Declarator &getDeclarator() const {
	return declarator;
	}

	bool isProcessingDeclSpec() const {
	return chunkIndex == declarator.getNumTypeObjects();
	}

	unsigned getCurrentChunkIndex() const {
	return chunkIndex;
	}

	void setCurrentChunkIndex(unsigned idx) {
	assert(idx <= declarator.getNumTypeObjects());
	chunkIndex = idx;
	}

	ParsedAttributesView &getCurrentAttributes() const {
	if (isProcessingDeclSpec())
	return getMutableDeclSpec().getAttributes();
	return declarator.getTypeObject(chunkIndex).getAttrs();
	}

	/// Save the current set of attributes on the DeclSpec.
	void saveDeclSpecAttrs() {
	// Don't try to save them multiple times.
	if (!savedAttrs.empty())
	return;

	DeclSpec &spec = getMutableDeclSpec();
	llvm::append_range(savedAttrs,
	llvm::make_pointer_range(spec.getAttributes()));
	}

	/// Record that we had nowhere to put the given type attribute.
	/// We will diagnose such attributes later.
	void addIgnoredTypeAttr(ParsedAttr &attr) {
	ignoredTypeAttrs.push_back(&attr);
	}

	/// Diagnose all the ignored type attributes, given that the
	/// declarator worked out to the given type.
	void diagnoseIgnoredTypeAttrs(QualType type) const {
	for (auto *Attr : ignoredTypeAttrs)
	diagnoseBadTypeAttribute(getSema(), *Attr, type);
	}

	/// Get an attributed type for the given attribute, and remember the Attr
	/// object so that we can attach it to the AttributedTypeLoc.
	QualType getAttributedType(Attr *A, QualType ModifiedType,
	QualType EquivType) {
	QualType T =
	sema.Context.getAttributedType(A->getKind(), ModifiedType, EquivType);
	AttrsForTypes.push_back({cast<AttributedType>(T.getTypePtr()), A});
	AttrsForTypesSorted = false;
	return T;
	}

	/// Get a BTFTagAttributed type for the btf_type_tag attribute.
	QualType getBTFTagAttributedType(const BTFTypeTagAttr *BTFAttr,
	QualType WrappedType) {
	return sema.Context.getBTFTagAttributedType(BTFAttr, WrappedType);
	}

	/// Completely replace the \c auto in \p TypeWithAuto by
	/// \p Replacement. Also replace \p TypeWithAuto in \c TypeAttrPair if
	/// necessary.
	QualType ReplaceAutoType(QualType TypeWithAuto, QualType Replacement) {
	QualType T = sema.ReplaceAutoType(TypeWithAuto, Replacement);
	if (auto *AttrTy = TypeWithAuto->getAs<AttributedType>()) {
	// Attributed type still should be an attributed type after replacement.
	auto *NewAttrTy = cast<AttributedType>(T.getTypePtr());
	for (TypeAttrPair &A : AttrsForTypes) {
	if (A.first == AttrTy)
	A.first = NewAttrTy;
	}
	AttrsForTypesSorted = false;
	}
	return T;
	}

	/// Extract and remove the Attr* for a given attributed type.
	const Attr takeAttrForAttributedType(const AttributedType AT) {
	if (!AttrsForTypesSorted) {
	llvm::stable_sort(AttrsForTypes, llvm::less_first());
	AttrsForTypesSorted = true;
	}

	// FIXME: This is quadratic if we have lots of reuses of the same
	// attributed type.
	for (auto It = std::partition_point(
	AttrsForTypes.begin(), AttrsForTypes.end(),
	[=](const TypeAttrPair &A) { return A.first < AT; });
	It != AttrsForTypes.end() && It->first == AT; ++It) {
	if (It->second) {
	const Attr *Result = It->second;
	It->second = nullptr;
	return Result;
	}
	}

	llvm_unreachable("no Attr* for AttributedType*");
	}

	SourceLocation
	getExpansionLocForMacroQualifiedType(const MacroQualifiedType *MQT) const {
	auto FoundLoc = LocsForMacros.find(MQT);
	assert(FoundLoc != LocsForMacros.end() &&
	"Unable to find macro expansion location for MacroQualifedType");
	return FoundLoc->second;
	}

	void setExpansionLocForMacroQualifiedType(const MacroQualifiedType *MQT,
	SourceLocation Loc) {
	LocsForMacros[MQT] = Loc;
	}

	void setParsedNoDeref(bool parsed) { parsedNoDeref = parsed; }

	bool didParseNoDeref() const { return parsedNoDeref; }

	~TypeProcessingState() {
	if (savedAttrs.empty())
	return;

	getMutableDeclSpec().getAttributes().clearListOnly();
	for (ParsedAttr *AL : savedAttrs)
	getMutableDeclSpec().getAttributes().addAtEnd(AL);
	}

	private:
	DeclSpec &getMutableDeclSpec() const {
	return const_cast<DeclSpec&>(declarator.getDeclSpec());
	}
	};
	} // end anonymous namespace

	static void moveAttrFromListToList(ParsedAttr &attr,
	ParsedAttributesView &fromList,
	ParsedAttributesView &toList) {
	fromList.remove(&attr);
	toList.addAtEnd(&attr);
	}

	/// The location of a type attribute.
	enum TypeAttrLocation {
	/// The attribute is in the decl-specifier-seq.
	TAL_DeclSpec,
	/// The attribute is part of a DeclaratorChunk.
	TAL_DeclChunk,
	/// The attribute is immediately after the declaration's name.
	TAL_DeclName
	};

	static void processTypeAttrs(TypeProcessingState &state, QualType &type,
	TypeAttrLocation TAL,
	const ParsedAttributesView &attrs);

	static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
	QualType &type);

	static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType &type);

	static bool handleObjCGCTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
	QualType &type);

	static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType &type);

	static bool handleObjCPointerTypeAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType &type) {
	if (attr.getKind() == ParsedAttr::AT_ObjCGC)
	return handleObjCGCTypeAttr(state, attr, type);
	assert(attr.getKind() == ParsedAttr::AT_ObjCOwnership);
	return handleObjCOwnershipTypeAttr(state, attr, type);
	}

	/// Given the index of a declarator chunk, check whether that chunk
	/// directly specifies the return type of a function and, if so, find
	/// an appropriate place for it.
	///
	/// \param i - a notional index which the search will start
	/// immediately inside
	///
	/// \param onlyBlockPointers Whether we should only look into block
	/// pointer types (vs. all pointer types).
	static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator,
	unsigned i,
	bool onlyBlockPointers) {
	assert(i <= declarator.getNumTypeObjects());

	DeclaratorChunk *result = nullptr;

	// First, look inwards past parens for a function declarator.
	for (; i != 0; --i) {
	DeclaratorChunk &fnChunk = declarator.getTypeObject(i-1);
	switch (fnChunk.Kind) {
	case DeclaratorChunk::Paren:
	continue;

	// If we find anything except a function, bail out.
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	return result;

	// If we do find a function declarator, scan inwards from that,
	// looking for a (block-)pointer declarator.
	case DeclaratorChunk::Function:
	for (--i; i != 0; --i) {
	DeclaratorChunk &ptrChunk = declarator.getTypeObject(i-1);
	switch (ptrChunk.Kind) {
	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Function:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::Pipe:
	continue;

	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pointer:
	if (onlyBlockPointers)
	continue;

	LLVM_FALLTHROUGH;

	case DeclaratorChunk::BlockPointer:
	result = &ptrChunk;
	goto continue_outer;
	}
	llvm_unreachable("bad declarator chunk kind");
	}

	// If we run out of declarators doing that, we're done.
	return result;
	}
	llvm_unreachable("bad declarator chunk kind");

	// Okay, reconsider from our new point.
	continue_outer: ;
	}

	// Ran out of chunks, bail out.
	return result;
	}

	/// Given that an objc_gc attribute was written somewhere on a
	/// declaration other than on the declarator itself (for which, use
	/// distributeObjCPointerTypeAttrFromDeclarator), and given that it
	/// didn't apply in whatever position it was written in, try to move
	/// it to a more appropriate position.
	static void distributeObjCPointerTypeAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType type) {
	Declarator &declarator = state.getDeclarator();

	// Move it to the outermost normal or block pointer declarator.
	for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
	switch (chunk.Kind) {
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer: {
	// But don't move an ARC ownership attribute to the return type
	// of a block.
	DeclaratorChunk *destChunk = nullptr;
	if (state.isProcessingDeclSpec() &&
	attr.getKind() == ParsedAttr::AT_ObjCOwnership)
	destChunk = maybeMovePastReturnType(declarator, i - 1,
	/onlyBlockPointers=/true);
	if (!destChunk) destChunk = &chunk;

	moveAttrFromListToList(attr, state.getCurrentAttributes(),
	destChunk->getAttrs());
	return;
	}

	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Array:
	continue;

	// We may be starting at the return type of a block.
	case DeclaratorChunk::Function:
	if (state.isProcessingDeclSpec() &&
	attr.getKind() == ParsedAttr::AT_ObjCOwnership) {
	if (DeclaratorChunk *dest = maybeMovePastReturnType(
	declarator, i,
	/onlyBlockPointers=/true)) {
	moveAttrFromListToList(attr, state.getCurrentAttributes(),
	dest->getAttrs());
	return;
	}
	}
	goto error;

	// Don't walk through these.
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	goto error;
	}
	}
	error:

	diagnoseBadTypeAttribute(state.getSema(), attr, type);
	}

	/// Distribute an objc_gc type attribute that was written on the
	/// declarator.
	static void distributeObjCPointerTypeAttrFromDeclarator(
	TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType) {
	Declarator &declarator = state.getDeclarator();

	// objc_gc goes on the innermost pointer to something that's not a
	// pointer.
	unsigned innermost = -1U;
	bool considerDeclSpec = true;
	for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i);
	switch (chunk.Kind) {
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	innermost = i;
	continue;

	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Pipe:
	continue;

	case DeclaratorChunk::Function:
	considerDeclSpec = false;
	goto done;
	}
	}
	done:

	// That might actually be the decl spec if we weren't blocked by
	// anything in the declarator.
	if (considerDeclSpec) {
	if (handleObjCPointerTypeAttr(state, attr, declSpecType)) {
	// Splice the attribute into the decl spec. Prevents the
	// attribute from being applied multiple times and gives
	// the source-location-filler something to work with.
	state.saveDeclSpecAttrs();
	declarator.getMutableDeclSpec().getAttributes().takeOneFrom(
	declarator.getAttributes(), &attr);
	return;
	}
	}

	// Otherwise, if we found an appropriate chunk, splice the attribute
	// into it.
	if (innermost != -1U) {
	moveAttrFromListToList(attr, declarator.getAttributes(),
	declarator.getTypeObject(innermost).getAttrs());
	return;
	}

	// Otherwise, diagnose when we're done building the type.
	declarator.getAttributes().remove(&attr);
	state.addIgnoredTypeAttr(attr);
	}

	/// A function type attribute was written somewhere in a declaration
	/// other than on the declarator itself or in the decl spec. Given
	/// that it didn't apply in whatever position it was written in, try
	/// to move it to a more appropriate position.
	static void distributeFunctionTypeAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType type) {
	Declarator &declarator = state.getDeclarator();

	// Try to push the attribute from the return type of a function to
	// the function itself.
	for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
	switch (chunk.Kind) {
	case DeclaratorChunk::Function:
	moveAttrFromListToList(attr, state.getCurrentAttributes(),
	chunk.getAttrs());
	return;

	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	continue;
	}
	}

	diagnoseBadTypeAttribute(state.getSema(), attr, type);
	}

	/// Try to distribute a function type attribute to the innermost
	/// function chunk or type. Returns true if the attribute was
	/// distributed, false if no location was found.
	static bool distributeFunctionTypeAttrToInnermost(
	TypeProcessingState &state, ParsedAttr &attr,
	ParsedAttributesView &attrList, QualType &declSpecType) {
	Declarator &declarator = state.getDeclarator();

	// Put it on the innermost function chunk, if there is one.
	for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i);
	if (chunk.Kind != DeclaratorChunk::Function) continue;

	moveAttrFromListToList(attr, attrList, chunk.getAttrs());
	return true;
	}

	return handleFunctionTypeAttr(state, attr, declSpecType);
	}

	/// A function type attribute was written in the decl spec. Try to
	/// apply it somewhere.
	static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
	ParsedAttr &attr,
	QualType &declSpecType) {
	state.saveDeclSpecAttrs();

	// Try to distribute to the innermost.
	if (distributeFunctionTypeAttrToInnermost(
	state, attr, state.getCurrentAttributes(), declSpecType))
	return;

	// If that failed, diagnose the bad attribute when the declarator is
	// fully built.
	state.addIgnoredTypeAttr(attr);
	}

	/// A function type attribute was written on the declarator or declaration.
	/// Try to apply it somewhere.
	/// `Attrs` is the attribute list containing the declaration (either of the
	/// declarator or the declaration).
	static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
	ParsedAttr &attr,
	QualType &declSpecType) {
	Declarator &declarator = state.getDeclarator();

	// Try to distribute to the innermost.
	if (distributeFunctionTypeAttrToInnermost(
	state, attr, declarator.getAttributes(), declSpecType))
	return;

	// If that failed, diagnose the bad attribute when the declarator is
	// fully built.
	declarator.getAttributes().remove(&attr);
	state.addIgnoredTypeAttr(attr);
	}

	/// Given that there are attributes written on the declarator or declaration
	/// itself, try to distribute any type attributes to the appropriate
	/// declarator chunk.
	///
	/// These are attributes like the following:
	/// int f ATTR;
	/// int (f ATTR)();
	/// but not necessarily this:
	/// int f() ATTR;
	///
	/// `Attrs` is the attribute list containing the declaration (either of the
	/// declarator or the declaration).
	static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
	QualType &declSpecType) {
	// The called functions in this loop actually remove things from the current
	// list, so iterating over the existing list isn't possible. Instead, make a
	// non-owning copy and iterate over that.
	ParsedAttributesView AttrsCopy{state.getDeclarator().getAttributes()};
	for (ParsedAttr &attr : AttrsCopy) {
	// Do not distribute [[]] attributes. They have strict rules for what
	// they appertain to.
	if (attr.isStandardAttributeSyntax())
	continue;

	switch (attr.getKind()) {
	OBJC_POINTER_TYPE_ATTRS_CASELIST:
	distributeObjCPointerTypeAttrFromDeclarator(state, attr, declSpecType);
	break;

	FUNCTION_TYPE_ATTRS_CASELIST:
	distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType);
	break;

	MS_TYPE_ATTRS_CASELIST:
	// Microsoft type attributes cannot go after the declarator-id.
	continue;

	NULLABILITY_TYPE_ATTRS_CASELIST:
	// Nullability specifiers cannot go after the declarator-id.

	// Objective-C __kindof does not get distributed.
	case ParsedAttr::AT_ObjCKindOf:
	continue;

	default:
	break;
	}
	}
	}

	/// Add a synthetic '()' to a block-literal declarator if it is
	/// required, given the return type.
	static void maybeSynthesizeBlockSignature(TypeProcessingState &state,
	QualType declSpecType) {
	Declarator &declarator = state.getDeclarator();

	// First, check whether the declarator would produce a function,
	// i.e. whether the innermost semantic chunk is a function.
	if (declarator.isFunctionDeclarator()) {
	// If so, make that declarator a prototyped declarator.
	declarator.getFunctionTypeInfo().hasPrototype = true;
	return;
	}

	// If there are any type objects, the type as written won't name a
	// function, regardless of the decl spec type. This is because a
	// block signature declarator is always an abstract-declarator, and
	// abstract-declarators can't just be parentheses chunks. Therefore
	// we need to build a function chunk unless there are no type
	// objects and the decl spec type is a function.
	if (!declarator.getNumTypeObjects() && declSpecType->isFunctionType())
	return;

	// Note that there are cases with invalid declarators where
	// declarators consist solely of parentheses. In general, these
	// occur only in failed efforts to make function declarators, so
	// faking up the function chunk is still the right thing to do.

	// Otherwise, we need to fake up a function declarator.
	SourceLocation loc = declarator.getBeginLoc();

	// ...and prepend it to the declarator.
	SourceLocation NoLoc;
	declarator.AddInnermostTypeInfo(DeclaratorChunk::getFunction(
	/HasProto=/true,
	/IsAmbiguous=/false,
	/LParenLoc=/NoLoc,
	/ArgInfo=/nullptr,
	/NumParams=/0,
	/EllipsisLoc=/NoLoc,
	/RParenLoc=/NoLoc,
	/RefQualifierIsLvalueRef=/true,
	/RefQualifierLoc=/NoLoc,
	/MutableLoc=/NoLoc, EST_None,
	/ESpecRange=/SourceRange(),
	/Exceptions=/nullptr,
	/ExceptionRanges=/nullptr,
	/NumExceptions=/0,
	/NoexceptExpr=/nullptr,
	/ExceptionSpecTokens=/nullptr,
	/DeclsInPrototype=/None, loc, loc, declarator));

	// For consistency, make sure the state still has us as processing
	// the decl spec.
	assert(state.getCurrentChunkIndex() == declarator.getNumTypeObjects() - 1);
	state.setCurrentChunkIndex(declarator.getNumTypeObjects());
	}

	static void diagnoseAndRemoveTypeQualifiers(Sema &S, const DeclSpec &DS,
	unsigned &TypeQuals,
	QualType TypeSoFar,
	unsigned RemoveTQs,
	unsigned DiagID) {
	// If this occurs outside a template instantiation, warn the user about
	// it; they probably didn't mean to specify a redundant qualifier.
	typedef std::pair<DeclSpec::TQ, SourceLocation> QualLoc;
	for (QualLoc Qual : {QualLoc(DeclSpec::TQ_const, DS.getConstSpecLoc()),
	QualLoc(DeclSpec::TQ_restrict, DS.getRestrictSpecLoc()),
	QualLoc(DeclSpec::TQ_volatile, DS.getVolatileSpecLoc()),
	QualLoc(DeclSpec::TQ_atomic, DS.getAtomicSpecLoc())}) {
	if (!(RemoveTQs & Qual.first))
	continue;

	if (!S.inTemplateInstantiation()) {
	if (TypeQuals & Qual.first)
	S.Diag(Qual.second, DiagID)
	<< DeclSpec::getSpecifierName(Qual.first) << TypeSoFar
	<< FixItHint::CreateRemoval(Qual.second);
	}

	TypeQuals &= ~Qual.first;
	}
	}

	/// Return true if this is omitted block return type. Also check type
	/// attributes and type qualifiers when returning true.
	static bool checkOmittedBlockReturnType(Sema &S, Declarator &declarator,
	QualType Result) {
	if (!isOmittedBlockReturnType(declarator))
	return false;

	// Warn if we see type attributes for omitted return type on a block literal.
	SmallVector<ParsedAttr *, 2> ToBeRemoved;
	for (ParsedAttr &AL : declarator.getMutableDeclSpec().getAttributes()) {
	if (AL.isInvalid() \|\| !AL.isTypeAttr())
	continue;
	S.Diag(AL.getLoc(),
	diag::warn_block_literal_attributes_on_omitted_return_type)
	<< AL;
	ToBeRemoved.push_back(&AL);
	}
	// Remove bad attributes from the list.
	for (ParsedAttr *AL : ToBeRemoved)
	declarator.getMutableDeclSpec().getAttributes().remove(AL);

	// Warn if we see type qualifiers for omitted return type on a block literal.
	const DeclSpec &DS = declarator.getDeclSpec();
	unsigned TypeQuals = DS.getTypeQualifiers();
	diagnoseAndRemoveTypeQualifiers(S, DS, TypeQuals, Result, (unsigned)-1,
	diag::warn_block_literal_qualifiers_on_omitted_return_type);
	declarator.getMutableDeclSpec().ClearTypeQualifiers();

	return true;
	}

	/// Apply Objective-C type arguments to the given type.
	static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type,
	ArrayRef<TypeSourceInfo *> typeArgs,
	SourceRange typeArgsRange,
	bool failOnError = false) {
	// We can only apply type arguments to an Objective-C class type.
	const auto *objcObjectType = type->getAs<ObjCObjectType>();
	if (!objcObjectType \|\| !objcObjectType->getInterface()) {
	S.Diag(loc, diag::err_objc_type_args_non_class)
	<< type
	<< typeArgsRange;

	if (failOnError)
	return QualType();
	return type;
	}

	// The class type must be parameterized.
	ObjCInterfaceDecl *objcClass = objcObjectType->getInterface();
	ObjCTypeParamList *typeParams = objcClass->getTypeParamList();
	if (!typeParams) {
	S.Diag(loc, diag::err_objc_type_args_non_parameterized_class)
	<< objcClass->getDeclName()
	<< FixItHint::CreateRemoval(typeArgsRange);

	if (failOnError)
	return QualType();

	return type;
	}

	// The type must not already be specialized.
	if (objcObjectType->isSpecialized()) {
	S.Diag(loc, diag::err_objc_type_args_specialized_class)
	<< type
	<< FixItHint::CreateRemoval(typeArgsRange);

	if (failOnError)
	return QualType();

	return type;
	}

	// Check the type arguments.
	SmallVector<QualType, 4> finalTypeArgs;
	unsigned numTypeParams = typeParams->size();
	bool anyPackExpansions = false;
	for (unsigned i = 0, n = typeArgs.size(); i != n; ++i) {
	TypeSourceInfo *typeArgInfo = typeArgs[i];
	QualType typeArg = typeArgInfo->getType();

	// Type arguments cannot have explicit qualifiers or nullability.
	// We ignore indirect sources of these, e.g. behind typedefs or
	// template arguments.
	if (TypeLoc qual = typeArgInfo->getTypeLoc().findExplicitQualifierLoc()) {
	bool diagnosed = false;
	SourceRange rangeToRemove;
	if (auto attr = qual.getAs<AttributedTypeLoc>()) {
	rangeToRemove = attr.getLocalSourceRange();
	if (attr.getTypePtr()->getImmediateNullability()) {
	typeArg = attr.getTypePtr()->getModifiedType();
	S.Diag(attr.getBeginLoc(),
	diag::err_objc_type_arg_explicit_nullability)
	<< typeArg << FixItHint::CreateRemoval(rangeToRemove);
	diagnosed = true;
	}
	}

	if (!diagnosed) {
	S.Diag(qual.getBeginLoc(), diag::err_objc_type_arg_qualified)
	<< typeArg << typeArg.getQualifiers().getAsString()
	<< FixItHint::CreateRemoval(rangeToRemove);
	}
	}

	// Remove qualifiers even if they're non-local.
	typeArg = typeArg.getUnqualifiedType();

	finalTypeArgs.push_back(typeArg);

	if (typeArg->getAs<PackExpansionType>())
	anyPackExpansions = true;

	// Find the corresponding type parameter, if there is one.
	ObjCTypeParamDecl *typeParam = nullptr;
	if (!anyPackExpansions) {
	if (i < numTypeParams) {
	typeParam = typeParams->begin()[i];
	} else {
	// Too many arguments.
	S.Diag(loc, diag::err_objc_type_args_wrong_arity)
	<< false
	<< objcClass->getDeclName()
	<< (unsigned)typeArgs.size()
	<< numTypeParams;
	S.Diag(objcClass->getLocation(), diag::note_previous_decl)
	<< objcClass;

	if (failOnError)
	return QualType();

	return type;
	}
	}

	// Objective-C object pointer types must be substitutable for the bounds.
	if (const auto *typeArgObjC = typeArg->getAs<ObjCObjectPointerType>()) {
	// If we don't have a type parameter to match against, assume
	// everything is fine. There was a prior pack expansion that
	// means we won't be able to match anything.
	if (!typeParam) {
	assert(anyPackExpansions && "Too many arguments?");
	continue;
	}

	// Retrieve the bound.
	QualType bound = typeParam->getUnderlyingType();
	const auto *boundObjC = bound->getAs<ObjCObjectPointerType>();

	// Determine whether the type argument is substitutable for the bound.
	if (typeArgObjC->isObjCIdType()) {
	// When the type argument is 'id', the only acceptable type
	// parameter bound is 'id'.
	if (boundObjC->isObjCIdType())
	continue;
	} else if (S.Context.canAssignObjCInterfaces(boundObjC, typeArgObjC)) {
	// Otherwise, we follow the assignability rules.
	continue;
	}

	// Diagnose the mismatch.
	S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
	diag::err_objc_type_arg_does_not_match_bound)
	<< typeArg << bound << typeParam->getDeclName();
	S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here)
	<< typeParam->getDeclName();

	if (failOnError)
	return QualType();

	return type;
	}

	// Block pointer types are permitted for unqualified 'id' bounds.
	if (typeArg->isBlockPointerType()) {
	// If we don't have a type parameter to match against, assume
	// everything is fine. There was a prior pack expansion that
	// means we won't be able to match anything.
	if (!typeParam) {
	assert(anyPackExpansions && "Too many arguments?");
	continue;
	}

	// Retrieve the bound.
	QualType bound = typeParam->getUnderlyingType();
	if (bound->isBlockCompatibleObjCPointerType(S.Context))
	continue;

	// Diagnose the mismatch.
	S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
	diag::err_objc_type_arg_does_not_match_bound)
	<< typeArg << bound << typeParam->getDeclName();
	S.Diag(typeParam->getLocation(), diag::note_objc_type_param_here)
	<< typeParam->getDeclName();

	if (failOnError)
	return QualType();

	return type;
	}

	// Dependent types will be checked at instantiation time.
	if (typeArg->isDependentType()) {
	continue;
	}

	// Diagnose non-id-compatible type arguments.
	S.Diag(typeArgInfo->getTypeLoc().getBeginLoc(),
	diag::err_objc_type_arg_not_id_compatible)
	<< typeArg << typeArgInfo->getTypeLoc().getSourceRange();

	if (failOnError)
	return QualType();

	return type;
	}

	// Make sure we didn't have the wrong number of arguments.
	if (!anyPackExpansions && finalTypeArgs.size() != numTypeParams) {
	S.Diag(loc, diag::err_objc_type_args_wrong_arity)
	<< (typeArgs.size() < typeParams->size())
	<< objcClass->getDeclName()
	<< (unsigned)finalTypeArgs.size()
	<< (unsigned)numTypeParams;
	S.Diag(objcClass->getLocation(), diag::note_previous_decl)
	<< objcClass;

	if (failOnError)
	return QualType();

	return type;
	}

	// Success. Form the specialized type.
	return S.Context.getObjCObjectType(type, finalTypeArgs, { }, false);
	}

	QualType Sema::BuildObjCTypeParamType(const ObjCTypeParamDecl *Decl,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc,
	bool FailOnError) {
	QualType Result = QualType(Decl->getTypeForDecl(), 0);
	if (!Protocols.empty()) {
	bool HasError;
	Result = Context.applyObjCProtocolQualifiers(Result, Protocols,
	HasError);
	if (HasError) {
	Diag(SourceLocation(), diag::err_invalid_protocol_qualifiers)
	<< SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc);
	if (FailOnError) Result = QualType();
	}
	if (FailOnError && Result.isNull())
	return QualType();
	}

	return Result;
	}

	QualType Sema::BuildObjCObjectType(QualType BaseType,
	SourceLocation Loc,
	SourceLocation TypeArgsLAngleLoc,
	ArrayRef<TypeSourceInfo *> TypeArgs,
	SourceLocation TypeArgsRAngleLoc,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<ObjCProtocolDecl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc,
	bool FailOnError) {
	QualType Result = BaseType;
	if (!TypeArgs.empty()) {
	Result = applyObjCTypeArgs(*this, Loc, Result, TypeArgs,
	SourceRange(TypeArgsLAngleLoc,
	TypeArgsRAngleLoc),
	FailOnError);
	if (FailOnError && Result.isNull())
	return QualType();
	}

	if (!Protocols.empty()) {
	bool HasError;
	Result = Context.applyObjCProtocolQualifiers(Result, Protocols,
	HasError);
	if (HasError) {
	Diag(Loc, diag::err_invalid_protocol_qualifiers)
	<< SourceRange(ProtocolLAngleLoc, ProtocolRAngleLoc);
	if (FailOnError) Result = QualType();
	}
	if (FailOnError && Result.isNull())
	return QualType();
	}

	return Result;
	}

	TypeResult Sema::actOnObjCProtocolQualifierType(
	SourceLocation lAngleLoc,
	ArrayRef<Decl *> protocols,
	ArrayRef<SourceLocation> protocolLocs,
	SourceLocation rAngleLoc) {
	// Form id<protocol-list>.
	QualType Result = Context.getObjCObjectType(
	Context.ObjCBuiltinIdTy, { },
	llvm::makeArrayRef(
	(ObjCProtocolDecl * const *)protocols.data(),
	protocols.size()),
	false);
	Result = Context.getObjCObjectPointerType(Result);

	TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result);
	TypeLoc ResultTL = ResultTInfo->getTypeLoc();

	auto ObjCObjectPointerTL = ResultTL.castAs<ObjCObjectPointerTypeLoc>();
	ObjCObjectPointerTL.setStarLoc(SourceLocation()); // implicit

	auto ObjCObjectTL = ObjCObjectPointerTL.getPointeeLoc()
	.castAs<ObjCObjectTypeLoc>();
	ObjCObjectTL.setHasBaseTypeAsWritten(false);
	ObjCObjectTL.getBaseLoc().initialize(Context, SourceLocation());

	// No type arguments.
	ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation());
	ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation());

	// Fill in protocol qualifiers.
	ObjCObjectTL.setProtocolLAngleLoc(lAngleLoc);
	ObjCObjectTL.setProtocolRAngleLoc(rAngleLoc);
	for (unsigned i = 0, n = protocols.size(); i != n; ++i)
	ObjCObjectTL.setProtocolLoc(i, protocolLocs[i]);

	// We're done. Return the completed type to the parser.
	return CreateParsedType(Result, ResultTInfo);
	}

	TypeResult Sema::actOnObjCTypeArgsAndProtocolQualifiers(
	Scope *S,
	SourceLocation Loc,
	ParsedType BaseType,
	SourceLocation TypeArgsLAngleLoc,
	ArrayRef<ParsedType> TypeArgs,
	SourceLocation TypeArgsRAngleLoc,
	SourceLocation ProtocolLAngleLoc,
	ArrayRef<Decl *> Protocols,
	ArrayRef<SourceLocation> ProtocolLocs,
	SourceLocation ProtocolRAngleLoc) {
	TypeSourceInfo *BaseTypeInfo = nullptr;
	QualType T = GetTypeFromParser(BaseType, &BaseTypeInfo);
	if (T.isNull())
	return true;

	// Handle missing type-source info.
	if (!BaseTypeInfo)
	BaseTypeInfo = Context.getTrivialTypeSourceInfo(T, Loc);

	// Extract type arguments.
	SmallVector<TypeSourceInfo *, 4> ActualTypeArgInfos;
	for (unsigned i = 0, n = TypeArgs.size(); i != n; ++i) {
	TypeSourceInfo *TypeArgInfo = nullptr;
	QualType TypeArg = GetTypeFromParser(TypeArgs[i], &TypeArgInfo);
	if (TypeArg.isNull()) {
	ActualTypeArgInfos.clear();
	break;
	}

	assert(TypeArgInfo && "No type source info?");
	ActualTypeArgInfos.push_back(TypeArgInfo);
	}

	// Build the object type.
	QualType Result = BuildObjCObjectType(
	T, BaseTypeInfo->getTypeLoc().getSourceRange().getBegin(),
	TypeArgsLAngleLoc, ActualTypeArgInfos, TypeArgsRAngleLoc,
	ProtocolLAngleLoc,
	llvm::makeArrayRef((ObjCProtocolDecl * const *)Protocols.data(),
	Protocols.size()),
	ProtocolLocs, ProtocolRAngleLoc,
	/FailOnError=/false);

	if (Result == T)
	return BaseType;

	// Create source information for this type.
	TypeSourceInfo *ResultTInfo = Context.CreateTypeSourceInfo(Result);
	TypeLoc ResultTL = ResultTInfo->getTypeLoc();

	// For id<Proto1, Proto2> or Class<Proto1, Proto2>, we'll have an
	// object pointer type. Fill in source information for it.
	if (auto ObjCObjectPointerTL = ResultTL.getAs<ObjCObjectPointerTypeLoc>()) {
	// The '*' is implicit.
	ObjCObjectPointerTL.setStarLoc(SourceLocation());
	ResultTL = ObjCObjectPointerTL.getPointeeLoc();
	}

	if (auto OTPTL = ResultTL.getAs<ObjCTypeParamTypeLoc>()) {
	// Protocol qualifier information.
	if (OTPTL.getNumProtocols() > 0) {
	assert(OTPTL.getNumProtocols() == Protocols.size());
	OTPTL.setProtocolLAngleLoc(ProtocolLAngleLoc);
	OTPTL.setProtocolRAngleLoc(ProtocolRAngleLoc);
	for (unsigned i = 0, n = Protocols.size(); i != n; ++i)
	OTPTL.setProtocolLoc(i, ProtocolLocs[i]);
	}

	// We're done. Return the completed type to the parser.
	return CreateParsedType(Result, ResultTInfo);
	}

	auto ObjCObjectTL = ResultTL.castAs<ObjCObjectTypeLoc>();

	// Type argument information.
	if (ObjCObjectTL.getNumTypeArgs() > 0) {
	assert(ObjCObjectTL.getNumTypeArgs() == ActualTypeArgInfos.size());
	ObjCObjectTL.setTypeArgsLAngleLoc(TypeArgsLAngleLoc);
	ObjCObjectTL.setTypeArgsRAngleLoc(TypeArgsRAngleLoc);
	for (unsigned i = 0, n = ActualTypeArgInfos.size(); i != n; ++i)
	ObjCObjectTL.setTypeArgTInfo(i, ActualTypeArgInfos[i]);
	} else {
	ObjCObjectTL.setTypeArgsLAngleLoc(SourceLocation());
	ObjCObjectTL.setTypeArgsRAngleLoc(SourceLocation());
	}

	// Protocol qualifier information.
	if (ObjCObjectTL.getNumProtocols() > 0) {
	assert(ObjCObjectTL.getNumProtocols() == Protocols.size());
	ObjCObjectTL.setProtocolLAngleLoc(ProtocolLAngleLoc);
	ObjCObjectTL.setProtocolRAngleLoc(ProtocolRAngleLoc);
	for (unsigned i = 0, n = Protocols.size(); i != n; ++i)
	ObjCObjectTL.setProtocolLoc(i, ProtocolLocs[i]);
	} else {
	ObjCObjectTL.setProtocolLAngleLoc(SourceLocation());
	ObjCObjectTL.setProtocolRAngleLoc(SourceLocation());
	}

	// Base type.
	ObjCObjectTL.setHasBaseTypeAsWritten(true);
	if (ObjCObjectTL.getType() == T)
	ObjCObjectTL.getBaseLoc().initializeFullCopy(BaseTypeInfo->getTypeLoc());
	else
	ObjCObjectTL.getBaseLoc().initialize(Context, Loc);

	// We're done. Return the completed type to the parser.
	return CreateParsedType(Result, ResultTInfo);
	}

	static OpenCLAccessAttr::Spelling
	getImageAccess(const ParsedAttributesView &Attrs) {
	for (const ParsedAttr &AL : Attrs)
	if (AL.getKind() == ParsedAttr::AT_OpenCLAccess)
	return static_cast<OpenCLAccessAttr::Spelling>(AL.getSemanticSpelling());
	return OpenCLAccessAttr::Keyword_read_only;
	}

	/// Convert the specified declspec to the appropriate type
	/// object.
	/// \param state Specifies the declarator containing the declaration specifier
	/// to be converted, along with other associated processing state.
	/// \returns The type described by the declaration specifiers. This function
	/// never returns null.
	static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
	// FIXME: Should move the logic from DeclSpec::Finish to here for validity
	// checking.

	Sema &S = state.getSema();
	Declarator &declarator = state.getDeclarator();
	DeclSpec &DS = declarator.getMutableDeclSpec();
	SourceLocation DeclLoc = declarator.getIdentifierLoc();
	if (DeclLoc.isInvalid())
	DeclLoc = DS.getBeginLoc();

	ASTContext &Context = S.Context;

	QualType Result;
	switch (DS.getTypeSpecType()) {
	case DeclSpec::TST_void:
	Result = Context.VoidTy;
	break;
	case DeclSpec::TST_char:
	if (DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified)
	Result = Context.CharTy;
	else if (DS.getTypeSpecSign() == TypeSpecifierSign::Signed)
	Result = Context.SignedCharTy;
	else {
	assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned &&
	"Unknown TSS value");
	Result = Context.UnsignedCharTy;
	}
	break;
	case DeclSpec::TST_wchar:
	if (DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified)
	Result = Context.WCharTy;
	else if (DS.getTypeSpecSign() == TypeSpecifierSign::Signed) {
	S.Diag(DS.getTypeSpecSignLoc(), diag::ext_wchar_t_sign_spec)
	<< DS.getSpecifierName(DS.getTypeSpecType(),
	Context.getPrintingPolicy());
	Result = Context.getSignedWCharType();
	} else {
	assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned &&
	"Unknown TSS value");
	S.Diag(DS.getTypeSpecSignLoc(), diag::ext_wchar_t_sign_spec)
	<< DS.getSpecifierName(DS.getTypeSpecType(),
	Context.getPrintingPolicy());
	Result = Context.getUnsignedWCharType();
	}
	break;
	case DeclSpec::TST_char8:
	assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
	"Unknown TSS value");
	Result = Context.Char8Ty;
	break;
	case DeclSpec::TST_char16:
	assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
	"Unknown TSS value");
	Result = Context.Char16Ty;
	break;
	case DeclSpec::TST_char32:
	assert(DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
	"Unknown TSS value");
	Result = Context.Char32Ty;
	break;
	case DeclSpec::TST_unspecified:
	// If this is a missing declspec in a block literal return context, then it
	// is inferred from the return statements inside the block.
	// The declspec is always missing in a lambda expr context; it is either
	// specified with a trailing return type or inferred.
	if (S.getLangOpts().CPlusPlus14 &&
	declarator.getContext() == DeclaratorContext::LambdaExpr) {
	// In C++1y, a lambda's implicit return type is 'auto'.
	Result = Context.getAutoDeductType();
	break;
	} else if (declarator.getContext() == DeclaratorContext::LambdaExpr \|\|
	checkOmittedBlockReturnType(S, declarator,
	Context.DependentTy)) {
	Result = Context.DependentTy;
	break;
	}

	// Unspecified typespec defaults to int in C90. However, the C90 grammar
	// [C90 6.5] only allows a decl-spec if there was some type-specifier,
	// type-qualifier, or storage-class-specifier. If not, emit an extwarn.
	// Note that the one exception to this is function definitions, which are
	// allowed to be completely missing a declspec. This is handled in the
	// parser already though by it pretending to have seen an 'int' in this
	// case.
	if (S.getLangOpts().isImplicitIntRequired()) {
	S.Diag(DeclLoc, diag::warn_missing_type_specifier)
	<< DS.getSourceRange()
	<< FixItHint::CreateInsertion(DS.getBeginLoc(), "int");
	} else if (!DS.hasTypeSpecifier()) {
	// C99 and C++ require a type specifier. For example, C99 6.7.2p2 says:
	// "At least one type specifier shall be given in the declaration
	// specifiers in each declaration, and in the specifier-qualifier list in
	// each struct declaration and type name."
	if (!S.getLangOpts().isImplicitIntAllowed() && !DS.isTypeSpecPipe()) {
	S.Diag(DeclLoc, diag::err_missing_type_specifier)
	<< DS.getSourceRange();

	// When this occurs, often something is very broken with the value
	// being declared, poison it as invalid so we don't get chains of
	// errors.
	declarator.setInvalidType(true);
	} else if (S.getLangOpts().getOpenCLCompatibleVersion() >= 200 &&
	DS.isTypeSpecPipe()) {
	S.Diag(DeclLoc, diag::err_missing_actual_pipe_type)
	<< DS.getSourceRange();
	declarator.setInvalidType(true);
	} else {
	assert(S.getLangOpts().isImplicitIntAllowed() &&
	"implicit int is disabled?");
	S.Diag(DeclLoc, diag::ext_missing_type_specifier)
	<< DS.getSourceRange()
	<< FixItHint::CreateInsertion(DS.getBeginLoc(), "int");
	}
	}

	LLVM_FALLTHROUGH;
	case DeclSpec::TST_int: {
	if (DS.getTypeSpecSign() != TypeSpecifierSign::Unsigned) {
	switch (DS.getTypeSpecWidth()) {
	case TypeSpecifierWidth::Unspecified:
	Result = Context.IntTy;
	break;
	case TypeSpecifierWidth::Short:
	Result = Context.ShortTy;
	break;
	case TypeSpecifierWidth::Long:
	Result = Context.LongTy;
	break;
	case TypeSpecifierWidth::LongLong:
	Result = Context.LongLongTy;

	// 'long long' is a C99 or C++11 feature.
	if (!S.getLangOpts().C99) {
	if (S.getLangOpts().CPlusPlus)
	S.Diag(DS.getTypeSpecWidthLoc(),
	S.getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
	else
	S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong);
	}
	break;
	}
	} else {
	switch (DS.getTypeSpecWidth()) {
	case TypeSpecifierWidth::Unspecified:
	Result = Context.UnsignedIntTy;
	break;
	case TypeSpecifierWidth::Short:
	Result = Context.UnsignedShortTy;
	break;
	case TypeSpecifierWidth::Long:
	Result = Context.UnsignedLongTy;
	break;
	case TypeSpecifierWidth::LongLong:
	Result = Context.UnsignedLongLongTy;

	// 'long long' is a C99 or C++11 feature.
	if (!S.getLangOpts().C99) {
	if (S.getLangOpts().CPlusPlus)
	S.Diag(DS.getTypeSpecWidthLoc(),
	S.getLangOpts().CPlusPlus11 ?
	diag::warn_cxx98_compat_longlong : diag::ext_cxx11_longlong);
	else
	S.Diag(DS.getTypeSpecWidthLoc(), diag::ext_c99_longlong);
	}
	break;
	}
	}
	break;
	}
	case DeclSpec::TST_bitint: {
	if (!S.Context.getTargetInfo().hasBitIntType())
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "_BitInt";
	Result =
	S.BuildBitIntType(DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned,
	DS.getRepAsExpr(), DS.getBeginLoc());
	if (Result.isNull()) {
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	}
	break;
	}
	case DeclSpec::TST_accum: {
	switch (DS.getTypeSpecWidth()) {
	case TypeSpecifierWidth::Short:
	Result = Context.ShortAccumTy;
	break;
	case TypeSpecifierWidth::Unspecified:
	Result = Context.AccumTy;
	break;
	case TypeSpecifierWidth::Long:
	Result = Context.LongAccumTy;
	break;
	case TypeSpecifierWidth::LongLong:
	llvm_unreachable("Unable to specify long long as _Accum width");
	}

	if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
	Result = Context.getCorrespondingUnsignedType(Result);

	if (DS.isTypeSpecSat())
	Result = Context.getCorrespondingSaturatedType(Result);

	break;
	}
	case DeclSpec::TST_fract: {
	switch (DS.getTypeSpecWidth()) {
	case TypeSpecifierWidth::Short:
	Result = Context.ShortFractTy;
	break;
	case TypeSpecifierWidth::Unspecified:
	Result = Context.FractTy;
	break;
	case TypeSpecifierWidth::Long:
	Result = Context.LongFractTy;
	break;
	case TypeSpecifierWidth::LongLong:
	llvm_unreachable("Unable to specify long long as _Fract width");
	}

	if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
	Result = Context.getCorrespondingUnsignedType(Result);

	if (DS.isTypeSpecSat())
	Result = Context.getCorrespondingSaturatedType(Result);

	break;
	}
	case DeclSpec::TST_int128:
	if (!S.Context.getTargetInfo().hasInt128Type() &&
	!(S.getLangOpts().SYCLIsDevice \|\| S.getLangOpts().CUDAIsDevice \|\|
	(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice)))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
	<< "__int128";
	if (DS.getTypeSpecSign() == TypeSpecifierSign::Unsigned)
	Result = Context.UnsignedInt128Ty;
	else
	Result = Context.Int128Ty;
	break;
	case DeclSpec::TST_float16:
	// CUDA host and device may have different _Float16 support, therefore
	// do not diagnose _Float16 usage to avoid false alarm.
	// ToDo: more precise diagnostics for CUDA.
	if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA &&
	!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
	<< "_Float16";
	Result = Context.Float16Ty;
	break;
	case DeclSpec::TST_half: Result = Context.HalfTy; break;
	case DeclSpec::TST_BFloat16:
	if (!S.Context.getTargetInfo().hasBFloat16Type())
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
	<< "__bf16";
	Result = Context.BFloat16Ty;
	break;
	case DeclSpec::TST_float: Result = Context.FloatTy; break;
	case DeclSpec::TST_double:
	if (DS.getTypeSpecWidth() == TypeSpecifierWidth::Long)
	Result = Context.LongDoubleTy;
	else
	Result = Context.DoubleTy;
	if (S.getLangOpts().OpenCL) {
	if (!S.getOpenCLOptions().isSupported("cl_khr_fp64", S.getLangOpts()))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
	<< 0 << Result
	<< (S.getLangOpts().getOpenCLCompatibleVersion() == 300
	? "cl_khr_fp64 and __opencl_c_fp64"
	: "cl_khr_fp64");
	else if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp64", S.getLangOpts()))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::ext_opencl_double_without_pragma);
	}
	break;
	case DeclSpec::TST_float128:
	if (!S.Context.getTargetInfo().hasFloat128Type() &&
	!S.getLangOpts().SYCLIsDevice &&
	!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
	<< "__float128";
	Result = Context.Float128Ty;
	break;
	case DeclSpec::TST_ibm128:
	if (!S.Context.getTargetInfo().hasIbm128Type() &&
	!S.getLangOpts().SYCLIsDevice &&
	!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported) << "__ibm128";
	Result = Context.Ibm128Ty;
	break;
	case DeclSpec::TST_bool:
	Result = Context.BoolTy; // _Bool or bool
	break;
	case DeclSpec::TST_decimal32: // _Decimal32
	case DeclSpec::TST_decimal64: // _Decimal64
	case DeclSpec::TST_decimal128: // _Decimal128
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_decimal_unsupported);
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	break;
	case DeclSpec::TST_class:
	case DeclSpec::TST_enum:
	case DeclSpec::TST_union:
	case DeclSpec::TST_struct:
	case DeclSpec::TST_interface: {
	TagDecl *D = dyn_cast_or_null<TagDecl>(DS.getRepAsDecl());
	if (!D) {
	// This can happen in C++ with ambiguous lookups.
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	break;
	}

	// If the type is deprecated or unavailable, diagnose it.
	S.DiagnoseUseOfDecl(D, DS.getTypeSpecTypeNameLoc());

	assert(DS.getTypeSpecWidth() == TypeSpecifierWidth::Unspecified &&
	DS.getTypeSpecComplex() == 0 &&
	DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
	"No qualifiers on tag names!");

	// TypeQuals handled by caller.
	Result = Context.getTypeDeclType(D);

	// In both C and C++, make an ElaboratedType.
	ElaboratedTypeKeyword Keyword
	= ElaboratedType::getKeywordForTypeSpec(DS.getTypeSpecType());
	Result = S.getElaboratedType(Keyword, DS.getTypeSpecScope(), Result,
	DS.isTypeSpecOwned() ? D : nullptr);
	break;
	}
	case DeclSpec::TST_typename: {
	assert(DS.getTypeSpecWidth() == TypeSpecifierWidth::Unspecified &&
	DS.getTypeSpecComplex() == 0 &&
	DS.getTypeSpecSign() == TypeSpecifierSign::Unspecified &&
	"Can't handle qualifiers on typedef names yet!");
	Result = S.GetTypeFromParser(DS.getRepAsType());
	if (Result.isNull()) {
	declarator.setInvalidType(true);
	}

	// TypeQuals handled by caller.
	break;
	}
	case DeclSpec::TST_typeofType:
	// FIXME: Preserve type source info.
	Result = S.GetTypeFromParser(DS.getRepAsType());
	assert(!Result.isNull() && "Didn't get a type for typeof?");
	if (!Result->isDependentType())
	if (const TagType *TT = Result->getAs<TagType>())
	S.DiagnoseUseOfDecl(TT->getDecl(), DS.getTypeSpecTypeLoc());
	// TypeQuals handled by caller.
	Result = Context.getTypeOfType(Result);
	break;
	case DeclSpec::TST_typeofExpr: {
	Expr *E = DS.getRepAsExpr();
	assert(E && "Didn't get an expression for typeof?");
	// TypeQuals handled by caller.
	Result = S.BuildTypeofExprType(E);
	if (Result.isNull()) {
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	}
	break;
	}
	case DeclSpec::TST_decltype: {
	Expr *E = DS.getRepAsExpr();
	assert(E && "Didn't get an expression for decltype?");
	// TypeQuals handled by caller.
	Result = S.BuildDecltypeType(E);
	if (Result.isNull()) {
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	}
	break;
	}
	case DeclSpec::TST_underlyingType:
	Result = S.GetTypeFromParser(DS.getRepAsType());
	assert(!Result.isNull() && "Didn't get a type for __underlying_type?");
	Result = S.BuildUnaryTransformType(Result,
	UnaryTransformType::EnumUnderlyingType,
	DS.getTypeSpecTypeLoc());
	if (Result.isNull()) {
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	}
	break;

	case DeclSpec::TST_auto:
	case DeclSpec::TST_decltype_auto: {
	auto AutoKW = DS.getTypeSpecType() == DeclSpec::TST_decltype_auto
	? AutoTypeKeyword::DecltypeAuto
	: AutoTypeKeyword::Auto;

	ConceptDecl *TypeConstraintConcept = nullptr;
	llvm::SmallVector<TemplateArgument, 8> TemplateArgs;
	if (DS.isConstrainedAuto()) {
	if (TemplateIdAnnotation *TemplateId = DS.getRepAsTemplateId()) {
	TypeConstraintConcept =
	cast<ConceptDecl>(TemplateId->Template.get().getAsTemplateDecl());
	TemplateArgumentListInfo TemplateArgsInfo;
	TemplateArgsInfo.setLAngleLoc(TemplateId->LAngleLoc);
	TemplateArgsInfo.setRAngleLoc(TemplateId->RAngleLoc);
	ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
	TemplateId->NumArgs);
	S.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);
	for (const auto &ArgLoc : TemplateArgsInfo.arguments())
	TemplateArgs.push_back(ArgLoc.getArgument());
	} else {
	declarator.setInvalidType(true);
	}
	}
	Result = S.Context.getAutoType(QualType(), AutoKW,
	/IsDependent/ false, /IsPack=/false,
	TypeConstraintConcept, TemplateArgs);
	break;
	}

	case DeclSpec::TST_auto_type:
	Result = Context.getAutoType(QualType(), AutoTypeKeyword::GNUAutoType, false);
	break;

	case DeclSpec::TST_unknown_anytype:
	Result = Context.UnknownAnyTy;
	break;

	case DeclSpec::TST_atomic:
	Result = S.GetTypeFromParser(DS.getRepAsType());
	assert(!Result.isNull() && "Didn't get a type for _Atomic?");
	Result = S.BuildAtomicType(Result, DS.getTypeSpecTypeLoc());
	if (Result.isNull()) {
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	}
	break;

	#define GENERIC_IMAGE_TYPE(ImgType, Id) \
	case DeclSpec::TST_##ImgType##_t: \
	switch (getImageAccess(DS.getAttributes())) { \
	case OpenCLAccessAttr::Keyword_write_only: \
	Result = Context.Id##WOTy; \
	break; \
	case OpenCLAccessAttr::Keyword_read_write: \
	Result = Context.Id##RWTy; \
	break; \
	case OpenCLAccessAttr::Keyword_read_only: \
	Result = Context.Id##ROTy; \
	break; \
	case OpenCLAccessAttr::SpellingNotCalculated: \
	llvm_unreachable("Spelling not yet calculated"); \
	} \
	break;
	#include "clang/Basic/OpenCLImageTypes.def"

	case DeclSpec::TST_error:
	Result = Context.IntTy;
	declarator.setInvalidType(true);
	break;
	}

	// FIXME: we want resulting declarations to be marked invalid, but claiming
	// the type is invalid is too strong - e.g. it causes ActOnTypeName to return
	// a null type.
	if (Result->containsErrors())
	declarator.setInvalidType();

	if (S.getLangOpts().OpenCL) {
	const auto &OpenCLOptions = S.getOpenCLOptions();
	bool IsOpenCLC30Compatible =
	S.getLangOpts().getOpenCLCompatibleVersion() == 300;
	// OpenCL C v3.0 s6.3.3 - OpenCL image types require __opencl_c_images
	// support.
	// OpenCL C v3.0 s6.2.1 - OpenCL 3d image write types requires support
	// for OpenCL C 2.0, or OpenCL C 3.0 or newer and the
	// __opencl_c_3d_image_writes feature. OpenCL C v3.0 API s4.2 - For devices
	// that support OpenCL 3.0, cl_khr_3d_image_writes must be returned when and
	// only when the optional feature is supported
	if ((Result->isImageType() \|\| Result->isSamplerT()) &&
	(IsOpenCLC30Compatible &&
	!OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) {
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
	<< 0 << Result << "__opencl_c_images";
	declarator.setInvalidType();
	} else if (Result->isOCLImage3dWOType() &&
	!OpenCLOptions.isSupported("cl_khr_3d_image_writes",
	S.getLangOpts())) {
	S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension)
	<< 0 << Result
	<< (IsOpenCLC30Compatible
	? "cl_khr_3d_image_writes and __opencl_c_3d_image_writes"
	: "cl_khr_3d_image_writes");
	declarator.setInvalidType();
	}
	}

	bool IsFixedPointType = DS.getTypeSpecType() == DeclSpec::TST_accum \|\|
	DS.getTypeSpecType() == DeclSpec::TST_fract;

	// Only fixed point types can be saturated
	if (DS.isTypeSpecSat() && !IsFixedPointType)
	S.Diag(DS.getTypeSpecSatLoc(), diag::err_invalid_saturation_spec)
	<< DS.getSpecifierName(DS.getTypeSpecType(),
	Context.getPrintingPolicy());

	// Handle complex types.
	if (DS.getTypeSpecComplex() == DeclSpec::TSC_complex) {
	if (S.getLangOpts().Freestanding)
	S.Diag(DS.getTypeSpecComplexLoc(), diag::ext_freestanding_complex);
	Result = Context.getComplexType(Result);
	} else if (DS.isTypeAltiVecVector()) {
	unsigned typeSize = static_cast<unsigned>(Context.getTypeSize(Result));
	assert(typeSize > 0 && "type size for vector must be greater than 0 bits");
	VectorType::VectorKind VecKind = VectorType::AltiVecVector;
	if (DS.isTypeAltiVecPixel())
	VecKind = VectorType::AltiVecPixel;
	else if (DS.isTypeAltiVecBool())
	VecKind = VectorType::AltiVecBool;
	Result = Context.getVectorType(Result, 128/typeSize, VecKind);
	}

	// FIXME: Imaginary.
	if (DS.getTypeSpecComplex() == DeclSpec::TSC_imaginary)
	S.Diag(DS.getTypeSpecComplexLoc(), diag::err_imaginary_not_supported);

	// Before we process any type attributes, synthesize a block literal
	// function declarator if necessary.
	if (declarator.getContext() == DeclaratorContext::BlockLiteral)
	maybeSynthesizeBlockSignature(state, Result);

	// Apply any type attributes from the decl spec. This may cause the
	// list of type attributes to be temporarily saved while the type
	// attributes are pushed around.
	// pipe attributes will be handled later ( at GetFullTypeForDeclarator )
	if (!DS.isTypeSpecPipe()) {
	// We also apply declaration attributes that "slide" to the decl spec.
	// Ordering can be important for attributes. The decalaration attributes
	// come syntactically before the decl spec attributes, so we process them
	// in that order.
	ParsedAttributesView SlidingAttrs;
	for (ParsedAttr &AL : declarator.getDeclarationAttributes()) {
	if (AL.slidesFromDeclToDeclSpecLegacyBehavior()) {
	SlidingAttrs.addAtEnd(&AL);

	// For standard syntax attributes, which would normally appertain to the
	// declaration here, suggest moving them to the type instead. But only
	// do this for our own vendor attributes; moving other vendors'
	// attributes might hurt portability.
	// There's one special case that we need to deal with here: The
	// `MatrixType` attribute may only be used in a typedef declaration. If
	// it's being used anywhere else, don't output the warning as
	// ProcessDeclAttributes() will output an error anyway.
	if (AL.isStandardAttributeSyntax() && AL.isClangScope() &&
	!(AL.getKind() == ParsedAttr::AT_MatrixType &&
	DS.getStorageClassSpec() != DeclSpec::SCS_typedef)) {
	S.Diag(AL.getLoc(), diag::warn_type_attribute_deprecated_on_decl)
	<< AL;
	}
	}
	}
	// During this call to processTypeAttrs(),
	// TypeProcessingState::getCurrentAttributes() will erroneously return a
	// reference to the DeclSpec attributes, rather than the declaration
	// attributes. However, this doesn't matter, as getCurrentAttributes()
	// is only called when distributing attributes from one attribute list
	// to another. Declaration attributes are always C++11 attributes, and these
	// are never distributed.
	processTypeAttrs(state, Result, TAL_DeclSpec, SlidingAttrs);
	processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes());
	}

	// Apply const/volatile/restrict qualifiers to T.
	if (unsigned TypeQuals = DS.getTypeQualifiers()) {
	// Warn about CV qualifiers on function types.
	// C99 6.7.3p8:
	// If the specification of a function type includes any type qualifiers,
	// the behavior is undefined.
	// C++11 [dcl.fct]p7:
	// The effect of a cv-qualifier-seq in a function declarator is not the
	// same as adding cv-qualification on top of the function type. In the
	// latter case, the cv-qualifiers are ignored.
	if (Result->isFunctionType()) {
	diagnoseAndRemoveTypeQualifiers(
	S, DS, TypeQuals, Result, DeclSpec::TQ_const \| DeclSpec::TQ_volatile,
	S.getLangOpts().CPlusPlus
	? diag::warn_typecheck_function_qualifiers_ignored
	: diag::warn_typecheck_function_qualifiers_unspecified);
	// No diagnostic for 'restrict' or '_Atomic' applied to a
	// function type; we'll diagnose those later, in BuildQualifiedType.
	}

	// C++11 [dcl.ref]p1:
	// Cv-qualified references are ill-formed except when the
	// cv-qualifiers are introduced through the use of a typedef-name
	// or decltype-specifier, in which case the cv-qualifiers are ignored.
	//
	// There don't appear to be any other contexts in which a cv-qualified
	// reference type could be formed, so the 'ill-formed' clause here appears
	// to never happen.
	if (TypeQuals && Result->isReferenceType()) {
	diagnoseAndRemoveTypeQualifiers(
	S, DS, TypeQuals, Result,
	DeclSpec::TQ_const \| DeclSpec::TQ_volatile \| DeclSpec::TQ_atomic,
	diag::warn_typecheck_reference_qualifiers);
	}

	// C90 6.5.3 constraints: "The same type qualifier shall not appear more
	// than once in the same specifier-list or qualifier-list, either directly
	// or via one or more typedefs."
	if (!S.getLangOpts().C99 && !S.getLangOpts().CPlusPlus
	&& TypeQuals & Result.getCVRQualifiers()) {
	if (TypeQuals & DeclSpec::TQ_const && Result.isConstQualified()) {
	S.Diag(DS.getConstSpecLoc(), diag::ext_duplicate_declspec)
	<< "const";
	}

	if (TypeQuals & DeclSpec::TQ_volatile && Result.isVolatileQualified()) {
	S.Diag(DS.getVolatileSpecLoc(), diag::ext_duplicate_declspec)
	<< "volatile";
	}

	// C90 doesn't have restrict nor _Atomic, so it doesn't force us to
	// produce a warning in this case.
	}

	QualType Qualified = S.BuildQualifiedType(Result, DeclLoc, TypeQuals, &DS);

	// If adding qualifiers fails, just use the unqualified type.
	if (Qualified.isNull())
	declarator.setInvalidType(true);
	else
	Result = Qualified;
	}

	assert(!Result.isNull() && "This function should not return a null type");
	return Result;
	}

	static std::string getPrintableNameForEntity(DeclarationName Entity) {
	if (Entity)
	return Entity.getAsString();

	return "type name";
	}

	static bool isDependentOrGNUAutoType(QualType T) {
	if (T->isDependentType())
	return true;

	const auto *AT = dyn_cast<AutoType>(T);
	return AT && AT->isGNUAutoType();
	}

	QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
	Qualifiers Qs, const DeclSpec *DS) {
	if (T.isNull())
	return QualType();

	// Ignore any attempt to form a cv-qualified reference.
	if (T->isReferenceType()) {
	Qs.removeConst();
	Qs.removeVolatile();
	}

	// Enforce C99 6.7.3p2: "Types other than pointer types derived from
	// object or incomplete types shall not be restrict-qualified."
	if (Qs.hasRestrict()) {
	unsigned DiagID = 0;
	QualType ProblemTy;

	if (T->isAnyPointerType() \|\| T->isReferenceType() \|\|
	T->isMemberPointerType()) {
	QualType EltTy;
	if (T->isObjCObjectPointerType())
	EltTy = T;
	else if (const MemberPointerType *PTy = T->getAs<MemberPointerType>())
	EltTy = PTy->getPointeeType();
	else
	EltTy = T->getPointeeType();

	// If we have a pointer or reference, the pointee must have an object
	// incomplete type.
	if (!EltTy->isIncompleteOrObjectType()) {
	DiagID = diag::err_typecheck_invalid_restrict_invalid_pointee;
	ProblemTy = EltTy;
	}
	} else if (!isDependentOrGNUAutoType(T)) {
	// For an __auto_type variable, we may not have seen the initializer yet
	// and so have no idea whether the underlying type is a pointer type or
	// not.
	DiagID = diag::err_typecheck_invalid_restrict_not_pointer;
	ProblemTy = T;
	}

	if (DiagID) {
	Diag(DS ? DS->getRestrictSpecLoc() : Loc, DiagID) << ProblemTy;
	Qs.removeRestrict();
	}
	}

	return Context.getQualifiedType(T, Qs);
	}

	QualType Sema::BuildQualifiedType(QualType T, SourceLocation Loc,
	unsigned CVRAU, const DeclSpec *DS) {
	if (T.isNull())
	return QualType();

	// Ignore any attempt to form a cv-qualified reference.
	if (T->isReferenceType())
	CVRAU &=
	~(DeclSpec::TQ_const \| DeclSpec::TQ_volatile \| DeclSpec::TQ_atomic);

	// Convert from DeclSpec::TQ to Qualifiers::TQ by just dropping TQ_atomic and
	// TQ_unaligned;
	unsigned CVR = CVRAU & ~(DeclSpec::TQ_atomic \| DeclSpec::TQ_unaligned);

	// C11 6.7.3/5:
	// If the same qualifier appears more than once in the same
	// specifier-qualifier-list, either directly or via one or more typedefs,
	// the behavior is the same as if it appeared only once.
	//
	// It's not specified what happens when the _Atomic qualifier is applied to
	// a type specified with the _Atomic specifier, but we assume that this
	// should be treated as if the _Atomic qualifier appeared multiple times.
	if (CVRAU & DeclSpec::TQ_atomic && !T->isAtomicType()) {
	// C11 6.7.3/5:
	// If other qualifiers appear along with the _Atomic qualifier in a
	// specifier-qualifier-list, the resulting type is the so-qualified
	// atomic type.
	//
	// Don't need to worry about array types here, since _Atomic can't be
	// applied to such types.
	SplitQualType Split = T.getSplitUnqualifiedType();
	T = BuildAtomicType(QualType(Split.Ty, 0),
	DS ? DS->getAtomicSpecLoc() : Loc);
	if (T.isNull())
	return T;
	Split.Quals.addCVRQualifiers(CVR);
	return BuildQualifiedType(T, Loc, Split.Quals);
	}

	Qualifiers Q = Qualifiers::fromCVRMask(CVR);
	Q.setUnaligned(CVRAU & DeclSpec::TQ_unaligned);
	return BuildQualifiedType(T, Loc, Q, DS);
	}

	/// Build a paren type including \p T.
	QualType Sema::BuildParenType(QualType T) {
	return Context.getParenType(T);
	}

	/// Given that we're building a pointer or reference to the given
	static QualType inferARCLifetimeForPointee(Sema &S, QualType type,
	SourceLocation loc,
	bool isReference) {
	// Bail out if retention is unrequired or already specified.
	if (!type->isObjCLifetimeType() \|\|
	type.getObjCLifetime() != Qualifiers::OCL_None)
	return type;

	Qualifiers::ObjCLifetime implicitLifetime = Qualifiers::OCL_None;

	// If the object type is const-qualified, we can safely use
	// __unsafe_unretained. This is safe (because there are no read
	// barriers), and it'll be safe to coerce anything but __weak* to
	// the resulting type.
	if (type.isConstQualified()) {
	implicitLifetime = Qualifiers::OCL_ExplicitNone;

	// Otherwise, check whether the static type does not require
	// retaining. This currently only triggers for Class (possibly
	// protocol-qualifed, and arrays thereof).
	} else if (type->isObjCARCImplicitlyUnretainedType()) {
	implicitLifetime = Qualifiers::OCL_ExplicitNone;

	// If we are in an unevaluated context, like sizeof, skip adding a
	// qualification.
	} else if (S.isUnevaluatedContext()) {
	return type;

	// If that failed, give an error and recover using __strong. __strong
	// is the option most likely to prevent spurious second-order diagnostics,
	// like when binding a reference to a field.
	} else {
	// These types can show up in private ivars in system headers, so
	// we need this to not be an error in those cases. Instead we
	// want to delay.
	if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
	S.DelayedDiagnostics.add(
	sema::DelayedDiagnostic::makeForbiddenType(loc,
	diag::err_arc_indirect_no_ownership, type, isReference));
	} else {
	S.Diag(loc, diag::err_arc_indirect_no_ownership) << type << isReference;
	}
	implicitLifetime = Qualifiers::OCL_Strong;
	}
	assert(implicitLifetime && "didn't infer any lifetime!");

	Qualifiers qs;
	qs.addObjCLifetime(implicitLifetime);
	return S.Context.getQualifiedType(type, qs);
	}

	static std::string getFunctionQualifiersAsString(const FunctionProtoType *FnTy){
	std::string Quals = FnTy->getMethodQuals().getAsString();

	switch (FnTy->getRefQualifier()) {
	case RQ_None:
	break;

	case RQ_LValue:
	if (!Quals.empty())
	Quals += ' ';
	Quals += '&';
	break;

	case RQ_RValue:
	if (!Quals.empty())
	Quals += ' ';
	Quals += "&&";
	break;
	}

	return Quals;
	}

	namespace {
	/// Kinds of declarator that cannot contain a qualified function type.
	///
	/// C++98 [dcl.fct]p4 / C++11 [dcl.fct]p6:
	/// a function type with a cv-qualifier or a ref-qualifier can only appear
	/// at the topmost level of a type.
	///
	/// Parens and member pointers are permitted. We don't diagnose array and
	/// function declarators, because they don't allow function types at all.
	///
	/// The values of this enum are used in diagnostics.
	enum QualifiedFunctionKind { QFK_BlockPointer, QFK_Pointer, QFK_Reference };
	} // end anonymous namespace

	/// Check whether the type T is a qualified function type, and if it is,
	/// diagnose that it cannot be contained within the given kind of declarator.
	static bool checkQualifiedFunction(Sema &S, QualType T, SourceLocation Loc,
	QualifiedFunctionKind QFK) {
	// Does T refer to a function type with a cv-qualifier or a ref-qualifier?
	const FunctionProtoType *FPT = T->getAs<FunctionProtoType>();
	if (!FPT \|\|
	(FPT->getMethodQuals().empty() && FPT->getRefQualifier() == RQ_None))
	return false;

	S.Diag(Loc, diag::err_compound_qualified_function_type)
	<< QFK << isa<FunctionType>(T.IgnoreParens()) << T
	<< getFunctionQualifiersAsString(FPT);
	return true;
	}

	bool Sema::CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc) {
	const FunctionProtoType *FPT = T->getAs<FunctionProtoType>();
	if (!FPT \|\|
	(FPT->getMethodQuals().empty() && FPT->getRefQualifier() == RQ_None))
	return false;

	Diag(Loc, diag::err_qualified_function_typeid)
	<< T << getFunctionQualifiersAsString(FPT);
	return true;
	}

	// Helper to deduce addr space of a pointee type in OpenCL mode.
	static QualType deduceOpenCLPointeeAddrSpace(Sema &S, QualType PointeeType) {
	if (!PointeeType->isUndeducedAutoType() && !PointeeType->isDependentType() &&
	!PointeeType->isSamplerT() &&
	!PointeeType.hasAddressSpace())
	PointeeType = S.getASTContext().getAddrSpaceQualType(
	PointeeType, S.getASTContext().getDefaultOpenCLPointeeAddrSpace());
	return PointeeType;
	}

	/// Build a pointer type.
	///
	/// \param T The type to which we'll be building a pointer.
	///
	/// \param Loc The location of the entity whose type involves this
	/// pointer type or, if there is no such entity, the location of the
	/// type that will have pointer type.
	///
	/// \param Entity The name of the entity that involves the pointer
	/// type, if known.
	///
	/// \returns A suitable pointer type, if there are no
	/// errors. Otherwise, returns a NULL type.
	QualType Sema::BuildPointerType(QualType T,
	SourceLocation Loc, DeclarationName Entity) {
	if (T->isReferenceType()) {
	// C++ 8.3.2p4: There shall be no ... pointers to references ...
	Diag(Loc, diag::err_illegal_decl_pointer_to_reference)
	<< getPrintableNameForEntity(Entity) << T;
	return QualType();
	}

	if (T->isFunctionType() && getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
	getLangOpts())) {
	Diag(Loc, diag::err_opencl_function_pointer) << /pointer/ 0;
	return QualType();
	}

	if (getLangOpts().HLSL) {
	Diag(Loc, diag::err_hlsl_pointers_unsupported) << 0;
	return QualType();
	}

	if (checkQualifiedFunction(*this, T, Loc, QFK_Pointer))
	return QualType();

	assert(!T->isObjCObjectType() && "Should build ObjCObjectPointerType");

	// In ARC, it is forbidden to build pointers to unqualified pointers.
	if (getLangOpts().ObjCAutoRefCount)
	T = inferARCLifetimeForPointee(this, T, Loc, /reference*/ false);

	if (getLangOpts().OpenCL)
	T = deduceOpenCLPointeeAddrSpace(*this, T);

	// Build the pointer type.
	return Context.getPointerType(T);
	}

	/// Build a reference type.
	///
	/// \param T The type to which we'll be building a reference.
	///
	/// \param Loc The location of the entity whose type involves this
	/// reference type or, if there is no such entity, the location of the
	/// type that will have reference type.
	///
	/// \param Entity The name of the entity that involves the reference
	/// type, if known.
	///
	/// \returns A suitable reference type, if there are no
	/// errors. Otherwise, returns a NULL type.
	QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue,
	SourceLocation Loc,
	DeclarationName Entity) {
	assert(Context.getCanonicalType(T) != Context.OverloadTy &&
	"Unresolved overloaded function type");

	// C++0x [dcl.ref]p6:
	// If a typedef (7.1.3), a type template-parameter (14.3.1), or a
	// decltype-specifier (7.1.6.2) denotes a type TR that is a reference to a
	// type T, an attempt to create the type "lvalue reference to cv TR" creates
	// the type "lvalue reference to T", while an attempt to create the type
	// "rvalue reference to cv TR" creates the type TR.
	bool LValueRef = SpelledAsLValue \|\| T->getAs<LValueReferenceType>();

	// C++ [dcl.ref]p4: There shall be no references to references.
	//
	// According to C++ DR 106, references to references are only
	// diagnosed when they are written directly (e.g., "int & &"),
	// but not when they happen via a typedef:
	//
	// typedef int& intref;
	// typedef intref& intref2;
	//
	// Parser::ParseDeclaratorInternal diagnoses the case where
	// references are written directly; here, we handle the
	// collapsing of references-to-references as described in C++0x.
	// DR 106 and 540 introduce reference-collapsing into C++98/03.

	// C++ [dcl.ref]p1:
	// A declarator that specifies the type "reference to cv void"
	// is ill-formed.
	if (T->isVoidType()) {
	Diag(Loc, diag::err_reference_to_void);
	return QualType();
	}

	if (getLangOpts().HLSL) {
	Diag(Loc, diag::err_hlsl_pointers_unsupported) << 1;
	return QualType();
	}

	if (checkQualifiedFunction(*this, T, Loc, QFK_Reference))
	return QualType();

	if (T->isFunctionType() && getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
	getLangOpts())) {
	Diag(Loc, diag::err_opencl_function_pointer) << /reference/ 1;
	return QualType();
	}

	// In ARC, it is forbidden to build references to unqualified pointers.
	if (getLangOpts().ObjCAutoRefCount)
	T = inferARCLifetimeForPointee(this, T, Loc, /reference*/ true);

	if (getLangOpts().OpenCL)
	T = deduceOpenCLPointeeAddrSpace(*this, T);

	// Handle restrict on references.
	if (LValueRef)
	return Context.getLValueReferenceType(T, SpelledAsLValue);
	return Context.getRValueReferenceType(T);
	}

	/// Build a Read-only Pipe type.
	///
	/// \param T The type to which we'll be building a Pipe.
	///
	/// \param Loc We do not use it for now.
	///
	/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a
	/// NULL type.
	QualType Sema::BuildReadPipeType(QualType T, SourceLocation Loc) {
	return Context.getReadPipeType(T);
	}

	/// Build a Write-only Pipe type.
	///
	/// \param T The type to which we'll be building a Pipe.
	///
	/// \param Loc We do not use it for now.
	///
	/// \returns A suitable pipe type, if there are no errors. Otherwise, returns a
	/// NULL type.
	QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) {
	return Context.getWritePipeType(T);
	}

	/// Build a bit-precise integer type.
	///
	/// \param IsUnsigned Boolean representing the signedness of the type.
	///
	/// \param BitWidth Size of this int type in bits, or an expression representing
	/// that.
	///
	/// \param Loc Location of the keyword.
	QualType Sema::BuildBitIntType(bool IsUnsigned, Expr *BitWidth,
	SourceLocation Loc) {
	if (BitWidth->isInstantiationDependent())
	return Context.getDependentBitIntType(IsUnsigned, BitWidth);

	llvm::APSInt Bits(32);
	ExprResult ICE =
	VerifyIntegerConstantExpression(BitWidth, &Bits, /FIXME/ AllowFold);

	if (ICE.isInvalid())
	return QualType();

	size_t NumBits = Bits.getZExtValue();
	if (!IsUnsigned && NumBits < 2) {
	Diag(Loc, diag::err_bit_int_bad_size) << 0;
	return QualType();
	}

	if (IsUnsigned && NumBits < 1) {
	Diag(Loc, diag::err_bit_int_bad_size) << 1;
	return QualType();
	}

	const TargetInfo &TI = getASTContext().getTargetInfo();
	if (NumBits > TI.getMaxBitIntWidth()) {
	Diag(Loc, diag::err_bit_int_max_size)
	<< IsUnsigned << static_cast<uint64_t>(TI.getMaxBitIntWidth());
	return QualType();
	}

	return Context.getBitIntType(IsUnsigned, NumBits);
	}

	/// Check whether the specified array bound can be evaluated using the relevant
	/// language rules. If so, returns the possibly-converted expression and sets
	/// SizeVal to the size. If not, but the expression might be a VLA bound,
	/// returns ExprResult(). Otherwise, produces a diagnostic and returns
	/// ExprError().
	static ExprResult checkArraySize(Sema &S, Expr *&ArraySize,
	llvm::APSInt &SizeVal, unsigned VLADiag,
	bool VLAIsError) {
	if (S.getLangOpts().CPlusPlus14 &&
	(VLAIsError \|\|
	!ArraySize->getType()->isIntegralOrUnscopedEnumerationType())) {
	// C++14 [dcl.array]p1:
	// The constant-expression shall be a converted constant expression of
	// type std::size_t.
	//
	// Don't apply this rule if we might be forming a VLA: in that case, we
	// allow non-constant expressions and constant-folding. We only need to use
	// the converted constant expression rules (to properly convert the source)
	// when the source expression is of class type.
	return S.CheckConvertedConstantExpression(
	ArraySize, S.Context.getSizeType(), SizeVal, Sema::CCEK_ArrayBound);
	}

	// If the size is an ICE, it certainly isn't a VLA. If we're in a GNU mode
	// (like gnu99, but not c99) accept any evaluatable value as an extension.
	class VLADiagnoser : public Sema::VerifyICEDiagnoser {
	public:
	unsigned VLADiag;
	bool VLAIsError;
	bool IsVLA = false;

	VLADiagnoser(unsigned VLADiag, bool VLAIsError)
	: VLADiag(VLADiag), VLAIsError(VLAIsError) {}

	Sema::SemaDiagnosticBuilder diagnoseNotICEType(Sema &S, SourceLocation Loc,
	QualType T) override {
	return S.Diag(Loc, diag::err_array_size_non_int) << T;
	}

	Sema::SemaDiagnosticBuilder diagnoseNotICE(Sema &S,
	SourceLocation Loc) override {
	IsVLA = !VLAIsError;
	return S.Diag(Loc, VLADiag);
	}

	Sema::SemaDiagnosticBuilder diagnoseFold(Sema &S,
	SourceLocation Loc) override {
	return S.Diag(Loc, diag::ext_vla_folded_to_constant);
	}
	} Diagnoser(VLADiag, VLAIsError);

	ExprResult R =
	S.VerifyIntegerConstantExpression(ArraySize, &SizeVal, Diagnoser);
	if (Diagnoser.IsVLA)
	return ExprResult();
	return R;
	}

	/// Build an array type.
	///
	/// \param T The type of each element in the array.
	///
	/// \param ASM C99 array size modifier (e.g., '*', 'static').
	///
	/// \param ArraySize Expression describing the size of the array.
	///
	/// \param Brackets The range from the opening '[' to the closing ']'.
	///
	/// \param Entity The name of the entity that involves the array
	/// type, if known.
	///
	/// \returns A suitable array type, if there are no errors. Otherwise,
	/// returns a NULL type.
	QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
	Expr *ArraySize, unsigned Quals,
	SourceRange Brackets, DeclarationName Entity) {

	SourceLocation Loc = Brackets.getBegin();
	if (getLangOpts().CPlusPlus) {
	// C++ [dcl.array]p1:
	// T is called the array element type; this type shall not be a reference
	// type, the (possibly cv-qualified) type void, a function type or an
	// abstract class type.
	//
	// C++ [dcl.array]p3:
	// When several "array of" specifications are adjacent, [...] only the
	// first of the constant expressions that specify the bounds of the arrays
	// may be omitted.
	//
	// Note: function types are handled in the common path with C.
	if (T->isReferenceType()) {
	Diag(Loc, diag::err_illegal_decl_array_of_references)
	<< getPrintableNameForEntity(Entity) << T;
	return QualType();
	}

	if (T->isVoidType() \|\| T->isIncompleteArrayType()) {
	Diag(Loc, diag::err_array_incomplete_or_sizeless_type) << 0 << T;
	return QualType();
	}

	if (RequireNonAbstractType(Brackets.getBegin(), T,
	diag::err_array_of_abstract_type))
	return QualType();

	// Mentioning a member pointer type for an array type causes us to lock in
	// an inheritance model, even if it's inside an unused typedef.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft())
	if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>())
	if (!MPTy->getClass()->isDependentType())
	(void)isCompleteType(Loc, T);

	} else {
	// C99 6.7.5.2p1: If the element type is an incomplete or function type,
	// reject it (e.g. void ary[7], struct foo ary[7], void ary[7]())
	if (RequireCompleteSizedType(Loc, T,
	diag::err_array_incomplete_or_sizeless_type))
	return QualType();
	}

	if (T->isSizelessType()) {
	Diag(Loc, diag::err_array_incomplete_or_sizeless_type) << 1 << T;
	return QualType();
	}

	if (T->isFunctionType()) {
	Diag(Loc, diag::err_illegal_decl_array_of_functions)
	<< getPrintableNameForEntity(Entity) << T;
	return QualType();
	}

	if (const RecordType *EltTy = T->getAs<RecordType>()) {
	// If the element type is a struct or union that contains a variadic
	// array, accept it as a GNU extension: C99 6.7.2.1p2.
	if (EltTy->getDecl()->hasFlexibleArrayMember())
	Diag(Loc, diag::ext_flexible_array_in_array) << T;
	} else if (T->isObjCObjectType()) {
	Diag(Loc, diag::err_objc_array_of_interfaces) << T;
	return QualType();
	}

	// Do placeholder conversions on the array size expression.
	if (ArraySize && ArraySize->hasPlaceholderType()) {
	ExprResult Result = CheckPlaceholderExpr(ArraySize);
	if (Result.isInvalid()) return QualType();
	ArraySize = Result.get();
	}

	// Do lvalue-to-rvalue conversions on the array size expression.
	if (ArraySize && !ArraySize->isPRValue()) {
	ExprResult Result = DefaultLvalueConversion(ArraySize);
	if (Result.isInvalid())
	return QualType();

	ArraySize = Result.get();
	}

	// C99 6.7.5.2p1: The size expression shall have integer type.
	// C++11 allows contextual conversions to such types.
	if (!getLangOpts().CPlusPlus11 &&
	ArraySize && !ArraySize->isTypeDependent() &&
	!ArraySize->getType()->isIntegralOrUnscopedEnumerationType()) {
	Diag(ArraySize->getBeginLoc(), diag::err_array_size_non_int)
	<< ArraySize->getType() << ArraySize->getSourceRange();
	return QualType();
	}

	// VLAs always produce at least a -Wvla diagnostic, sometimes an error.
	unsigned VLADiag;
	bool VLAIsError;
	if (getLangOpts().OpenCL) {
	// OpenCL v1.2 s6.9.d: variable length arrays are not supported.
	VLADiag = diag::err_opencl_vla;
	VLAIsError = true;
	} else if (getLangOpts().C99) {
	VLADiag = diag::warn_vla_used;
	VLAIsError = false;
	} else if (isSFINAEContext()) {
	VLADiag = diag::err_vla_in_sfinae;
	VLAIsError = true;
	} else if (getLangOpts().OpenMP && isInOpenMPTaskUntiedContext()) {
	VLADiag = diag::err_openmp_vla_in_task_untied;
	VLAIsError = true;
	} else {
	VLADiag = diag::ext_vla;
	VLAIsError = false;
	}

	llvm::APSInt ConstVal(Context.getTypeSize(Context.getSizeType()));
	if (!ArraySize) {
	if (ASM == ArrayType::Star) {
	Diag(Loc, VLADiag);
	if (VLAIsError)
	return QualType();

	T = Context.getVariableArrayType(T, nullptr, ASM, Quals, Brackets);
	} else {
	T = Context.getIncompleteArrayType(T, ASM, Quals);
	}
	} else if (ArraySize->isTypeDependent() \|\| ArraySize->isValueDependent()) {
	T = Context.getDependentSizedArrayType(T, ArraySize, ASM, Quals, Brackets);
	} else {
	ExprResult R =
	checkArraySize(*this, ArraySize, ConstVal, VLADiag, VLAIsError);
	if (R.isInvalid())
	return QualType();

	if (!R.isUsable()) {
	// C99: an array with a non-ICE size is a VLA. We accept any expression
	// that we can fold to a non-zero positive value as a non-VLA as an
	// extension.
	T = Context.getVariableArrayType(T, ArraySize, ASM, Quals, Brackets);
	} else if (!T->isDependentType() && !T->isIncompleteType() &&
	!T->isConstantSizeType()) {
	// C99: an array with an element type that has a non-constant-size is a
	// VLA.
	// FIXME: Add a note to explain why this isn't a VLA.
	Diag(Loc, VLADiag);
	if (VLAIsError)
	return QualType();
	T = Context.getVariableArrayType(T, ArraySize, ASM, Quals, Brackets);
	} else {
	// C99 6.7.5.2p1: If the expression is a constant expression, it shall
	// have a value greater than zero.
	// In C++, this follows from narrowing conversions being disallowed.
	if (ConstVal.isSigned() && ConstVal.isNegative()) {
	if (Entity)
	Diag(ArraySize->getBeginLoc(), diag::err_decl_negative_array_size)
	<< getPrintableNameForEntity(Entity)
	<< ArraySize->getSourceRange();
	else
	Diag(ArraySize->getBeginLoc(),
	diag::err_typecheck_negative_array_size)
	<< ArraySize->getSourceRange();
	return QualType();
	}
	if (ConstVal == 0) {
	// GCC accepts zero sized static arrays. We allow them when
	// we're not in a SFINAE context.
	Diag(ArraySize->getBeginLoc(),
	isSFINAEContext() ? diag::err_typecheck_zero_array_size
	: diag::ext_typecheck_zero_array_size)
	<< 0 << ArraySize->getSourceRange();
	}

	// Is the array too large?
	unsigned ActiveSizeBits =
	(!T->isDependentType() && !T->isVariablyModifiedType() &&
	!T->isIncompleteType() && !T->isUndeducedType())
	? ConstantArrayType::getNumAddressingBits(Context, T, ConstVal)
	: ConstVal.getActiveBits();
	if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context)) {
	Diag(ArraySize->getBeginLoc(), diag::err_array_too_large)
	<< toString(ConstVal, 10) << ArraySize->getSourceRange();
	return QualType();
	}

	T = Context.getConstantArrayType(T, ConstVal, ArraySize, ASM, Quals);
	}
	}

	if (T->isVariableArrayType() && !Context.getTargetInfo().isVLASupported()) {
	// CUDA device code and some other targets don't support VLAs.
	targetDiag(Loc, (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
	? diag::err_cuda_vla
	: diag::err_vla_unsupported)
	<< ((getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
	? CurrentCUDATarget()
	: CFT_InvalidTarget);
	}

	// If this is not C99, diagnose array size modifiers on non-VLAs.
	if (!getLangOpts().C99 && !T->isVariableArrayType() &&
	(ASM != ArrayType::Normal \|\| Quals != 0)) {
	Diag(Loc, getLangOpts().CPlusPlus ? diag::err_c99_array_usage_cxx
	: diag::ext_c99_array_usage)
	<< ASM;
	}

	// OpenCL v2.0 s6.12.5 - Arrays of blocks are not supported.
	// OpenCL v2.0 s6.16.13.1 - Arrays of pipe type are not supported.
	// OpenCL v2.0 s6.9.b - Arrays of image/sampler type are not supported.
	if (getLangOpts().OpenCL) {
	const QualType ArrType = Context.getBaseElementType(T);
	if (ArrType->isBlockPointerType() \|\| ArrType->isPipeType() \|\|
	ArrType->isSamplerT() \|\| ArrType->isImageType()) {
	Diag(Loc, diag::err_opencl_invalid_type_array) << ArrType;
	return QualType();
	}
	}

	return T;
	}

	QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
	SourceLocation AttrLoc) {
	// The base type must be integer (not Boolean or enumeration) or float, and
	// can't already be a vector.
	if ((!CurType->isDependentType() &&
	(!CurType->isBuiltinType() \|\| CurType->isBooleanType() \|\|
	(!CurType->isIntegerType() && !CurType->isRealFloatingType()))) \|\|
	CurType->isArrayType()) {
	Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << CurType;
	return QualType();
	}

	if (SizeExpr->isTypeDependent() \|\| SizeExpr->isValueDependent())
	return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
	VectorType::GenericVector);

	Optional<llvm::APSInt> VecSize = SizeExpr->getIntegerConstantExpr(Context);
	if (!VecSize) {
	Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "vector_size" << AANT_ArgumentIntegerConstant
	<< SizeExpr->getSourceRange();
	return QualType();
	}

	if (CurType->isDependentType())
	return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
	VectorType::GenericVector);

	// vecSize is specified in bytes - convert to bits.
	if (!VecSize->isIntN(61)) {
	// Bit size will overflow uint64.
	Diag(AttrLoc, diag::err_attribute_size_too_large)
	<< SizeExpr->getSourceRange() << "vector";
	return QualType();
	}
	uint64_t VectorSizeBits = VecSize->getZExtValue() * 8;
	unsigned TypeSize = static_cast<unsigned>(Context.getTypeSize(CurType));

	if (VectorSizeBits == 0) {
	Diag(AttrLoc, diag::err_attribute_zero_size)
	<< SizeExpr->getSourceRange() << "vector";
	return QualType();
	}

	if (!TypeSize \|\| VectorSizeBits % TypeSize) {
	Diag(AttrLoc, diag::err_attribute_invalid_size)
	<< SizeExpr->getSourceRange();
	return QualType();
	}

	if (VectorSizeBits / TypeSize > std::numeric_limits<uint32_t>::max()) {
	Diag(AttrLoc, diag::err_attribute_size_too_large)
	<< SizeExpr->getSourceRange() << "vector";
	return QualType();
	}

	return Context.getVectorType(CurType, VectorSizeBits / TypeSize,
	VectorType::GenericVector);
	}

	/// Build an ext-vector type.
	///
	/// Run the required checks for the extended vector type.
	QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
	SourceLocation AttrLoc) {
	// Unlike gcc's vector_size attribute, we do not allow vectors to be defined
	// in conjunction with complex types (pointers, arrays, functions, etc.).
	//
	// Additionally, OpenCL prohibits vectors of booleans (they're considered a
	// reserved data type under OpenCL v2.0 s6.1.4), we don't support selects
	// on bitvectors, and we have no well-defined ABI for bitvectors, so vectors
	// of bool aren't allowed.
	//
	// We explictly allow bool elements in ext_vector_type for C/C++.
	bool IsNoBoolVecLang = getLangOpts().OpenCL \|\| getLangOpts().OpenCLCPlusPlus;
	if ((!T->isDependentType() && !T->isIntegerType() &&
	!T->isRealFloatingType()) \|\|
	(IsNoBoolVecLang && T->isBooleanType())) {
	Diag(AttrLoc, diag::err_attribute_invalid_vector_type) << T;
	return QualType();
	}

	if (!ArraySize->isTypeDependent() && !ArraySize->isValueDependent()) {
	Optional<llvm::APSInt> vecSize = ArraySize->getIntegerConstantExpr(Context);
	if (!vecSize) {
	Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "ext_vector_type" << AANT_ArgumentIntegerConstant
	<< ArraySize->getSourceRange();
	return QualType();
	}

	if (!vecSize->isIntN(32)) {
	Diag(AttrLoc, diag::err_attribute_size_too_large)
	<< ArraySize->getSourceRange() << "vector";
	return QualType();
	}
	// Unlike gcc's vector_size attribute, the size is specified as the
	// number of elements, not the number of bytes.
	unsigned vectorSize = static_cast<unsigned>(vecSize->getZExtValue());

	if (vectorSize == 0) {
	Diag(AttrLoc, diag::err_attribute_zero_size)
	<< ArraySize->getSourceRange() << "vector";
	return QualType();
	}

	return Context.getExtVectorType(T, vectorSize);
	}

	return Context.getDependentSizedExtVectorType(T, ArraySize, AttrLoc);
	}

	QualType Sema::BuildMatrixType(QualType ElementTy, Expr NumRows, Expr NumCols,
	SourceLocation AttrLoc) {
	assert(Context.getLangOpts().MatrixTypes &&
	"Should never build a matrix type when it is disabled");

	// Check element type, if it is not dependent.
	if (!ElementTy->isDependentType() &&
	!MatrixType::isValidElementType(ElementTy)) {
	Diag(AttrLoc, diag::err_attribute_invalid_matrix_type) << ElementTy;
	return QualType();
	}

	if (NumRows->isTypeDependent() \|\| NumCols->isTypeDependent() \|\|
	NumRows->isValueDependent() \|\| NumCols->isValueDependent())
	return Context.getDependentSizedMatrixType(ElementTy, NumRows, NumCols,
	AttrLoc);

	Optional<llvm::APSInt> ValueRows = NumRows->getIntegerConstantExpr(Context);
	Optional<llvm::APSInt> ValueColumns =
	NumCols->getIntegerConstantExpr(Context);

	auto const RowRange = NumRows->getSourceRange();
	auto const ColRange = NumCols->getSourceRange();

	// Both are row and column expressions are invalid.
	if (!ValueRows && !ValueColumns) {
	Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "matrix_type" << AANT_ArgumentIntegerConstant << RowRange
	<< ColRange;
	return QualType();
	}

	// Only the row expression is invalid.
	if (!ValueRows) {
	Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "matrix_type" << AANT_ArgumentIntegerConstant << RowRange;
	return QualType();
	}

	// Only the column expression is invalid.
	if (!ValueColumns) {
	Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "matrix_type" << AANT_ArgumentIntegerConstant << ColRange;
	return QualType();
	}

	// Check the matrix dimensions.
	unsigned MatrixRows = static_cast<unsigned>(ValueRows->getZExtValue());
	unsigned MatrixColumns = static_cast<unsigned>(ValueColumns->getZExtValue());
	if (MatrixRows == 0 && MatrixColumns == 0) {
	Diag(AttrLoc, diag::err_attribute_zero_size)
	<< "matrix" << RowRange << ColRange;
	return QualType();
	}
	if (MatrixRows == 0) {
	Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << RowRange;
	return QualType();
	}
	if (MatrixColumns == 0) {
	Diag(AttrLoc, diag::err_attribute_zero_size) << "matrix" << ColRange;
	return QualType();
	}
	if (!ConstantMatrixType::isDimensionValid(MatrixRows)) {
	Diag(AttrLoc, diag::err_attribute_size_too_large)
	<< RowRange << "matrix row";
	return QualType();
	}
	if (!ConstantMatrixType::isDimensionValid(MatrixColumns)) {
	Diag(AttrLoc, diag::err_attribute_size_too_large)
	<< ColRange << "matrix column";
	return QualType();
	}
	return Context.getConstantMatrixType(ElementTy, MatrixRows, MatrixColumns);
	}

	bool Sema::CheckFunctionReturnType(QualType T, SourceLocation Loc) {
	if (T->isArrayType() \|\| T->isFunctionType()) {
	Diag(Loc, diag::err_func_returning_array_function)
	<< T->isFunctionType() << T;
	return true;
	}

	// Functions cannot return half FP.
	if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
	Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 <<
	FixItHint::CreateInsertion(Loc, "*");
	return true;
	}

	// Methods cannot return interface types. All ObjC objects are
	// passed by reference.
	if (T->isObjCObjectType()) {
	Diag(Loc, diag::err_object_cannot_be_passed_returned_by_value)
	<< 0 << T << FixItHint::CreateInsertion(Loc, "*");
	return true;
	}

	if (T.hasNonTrivialToPrimitiveDestructCUnion() \|\|
	T.hasNonTrivialToPrimitiveCopyCUnion())
	checkNonTrivialCUnion(T, Loc, NTCUC_FunctionReturn,
	NTCUK_Destruct\|NTCUK_Copy);

	// C++2a [dcl.fct]p12:
	// A volatile-qualified return type is deprecated
	if (T.isVolatileQualified() && getLangOpts().CPlusPlus20)
	Diag(Loc, diag::warn_deprecated_volatile_return) << T;

	return false;
	}

	/// Check the extended parameter information. Most of the necessary
	/// checking should occur when applying the parameter attribute; the
	/// only other checks required are positional restrictions.
	static void checkExtParameterInfos(Sema &S, ArrayRef<QualType> paramTypes,
	const FunctionProtoType::ExtProtoInfo &EPI,
	llvm::function_ref<SourceLocation(unsigned)> getParamLoc) {
	assert(EPI.ExtParameterInfos && "shouldn't get here without param infos");

	bool emittedError = false;
	auto actualCC = EPI.ExtInfo.getCC();
	enum class RequiredCC { OnlySwift, SwiftOrSwiftAsync };
	auto checkCompatible = [&](unsigned paramIndex, RequiredCC required) {
	bool isCompatible =
	(required == RequiredCC::OnlySwift)
	? (actualCC == CC_Swift)
	: (actualCC == CC_Swift \|\| actualCC == CC_SwiftAsync);
	if (isCompatible \|\| emittedError)
	return;
	S.Diag(getParamLoc(paramIndex), diag::err_swift_param_attr_not_swiftcall)
	<< getParameterABISpelling(EPI.ExtParameterInfos[paramIndex].getABI())
	<< (required == RequiredCC::OnlySwift);
	emittedError = true;
	};
	for (size_t paramIndex = 0, numParams = paramTypes.size();
	paramIndex != numParams; ++paramIndex) {
	switch (EPI.ExtParameterInfos[paramIndex].getABI()) {
	// Nothing interesting to check for orindary-ABI parameters.
	case ParameterABI::Ordinary:
	continue;

	// swift_indirect_result parameters must be a prefix of the function
	// arguments.
	case ParameterABI::SwiftIndirectResult:
	checkCompatible(paramIndex, RequiredCC::SwiftOrSwiftAsync);
	if (paramIndex != 0 &&
	EPI.ExtParameterInfos[paramIndex - 1].getABI()
	!= ParameterABI::SwiftIndirectResult) {
	S.Diag(getParamLoc(paramIndex),
	diag::err_swift_indirect_result_not_first);
	}
	continue;

	case ParameterABI::SwiftContext:
	checkCompatible(paramIndex, RequiredCC::SwiftOrSwiftAsync);
	continue;

	// SwiftAsyncContext is not limited to swiftasynccall functions.
	case ParameterABI::SwiftAsyncContext:
	continue;

	// swift_error parameters must be preceded by a swift_context parameter.
	case ParameterABI::SwiftErrorResult:
	checkCompatible(paramIndex, RequiredCC::OnlySwift);
	if (paramIndex == 0 \|\|
	EPI.ExtParameterInfos[paramIndex - 1].getABI() !=
	ParameterABI::SwiftContext) {
	S.Diag(getParamLoc(paramIndex),
	diag::err_swift_error_result_not_after_swift_context);
	}
	continue;
	}
	llvm_unreachable("bad ABI kind");
	}
	}

	QualType Sema::BuildFunctionType(QualType T,
	MutableArrayRef<QualType> ParamTypes,
	SourceLocation Loc, DeclarationName Entity,
	const FunctionProtoType::ExtProtoInfo &EPI) {
	bool Invalid = false;

	Invalid \|= CheckFunctionReturnType(T, Loc);

	for (unsigned Idx = 0, Cnt = ParamTypes.size(); Idx < Cnt; ++Idx) {
	// FIXME: Loc is too inprecise here, should use proper locations for args.
	QualType ParamType = Context.getAdjustedParameterType(ParamTypes[Idx]);
	if (ParamType->isVoidType()) {
	Diag(Loc, diag::err_param_with_void_type);
	Invalid = true;
	} else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
	// Disallow half FP arguments.
	Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 <<
	FixItHint::CreateInsertion(Loc, "*");
	Invalid = true;
	}

	// C++2a [dcl.fct]p4:
	// A parameter with volatile-qualified type is deprecated
	if (ParamType.isVolatileQualified() && getLangOpts().CPlusPlus20)
	Diag(Loc, diag::warn_deprecated_volatile_param) << ParamType;

	ParamTypes[Idx] = ParamType;
	}

	if (EPI.ExtParameterInfos) {
	checkExtParameterInfos(*this, ParamTypes, EPI,
	[=](unsigned i) { return Loc; });
	}

	if (EPI.ExtInfo.getProducesResult()) {
	// This is just a warning, so we can't fail to build if we see it.
	checkNSReturnsRetainedReturnType(Loc, T);
	}

	if (Invalid)
	return QualType();

	return Context.getFunctionType(T, ParamTypes, EPI);
	}

	/// Build a member pointer type \c T Class::*.
	///
	/// \param T the type to which the member pointer refers.
	/// \param Class the class type into which the member pointer points.
	/// \param Loc the location where this type begins
	/// \param Entity the name of the entity that will have this member pointer type
	///
	/// \returns a member pointer type, if successful, or a NULL type if there was
	/// an error.
	QualType Sema::BuildMemberPointerType(QualType T, QualType Class,
	SourceLocation Loc,
	DeclarationName Entity) {
	// Verify that we're not building a pointer to pointer to function with
	// exception specification.
	if (CheckDistantExceptionSpec(T)) {
	Diag(Loc, diag::err_distant_exception_spec);
	return QualType();
	}

	// C++ 8.3.3p3: A pointer to member shall not point to ... a member
	// with reference type, or "cv void."
	if (T->isReferenceType()) {
	Diag(Loc, diag::err_illegal_decl_mempointer_to_reference)
	<< getPrintableNameForEntity(Entity) << T;
	return QualType();
	}

	if (T->isVoidType()) {
	Diag(Loc, diag::err_illegal_decl_mempointer_to_void)
	<< getPrintableNameForEntity(Entity);
	return QualType();
	}

	if (!Class->isDependentType() && !Class->isRecordType()) {
	Diag(Loc, diag::err_mempointer_in_nonclass_type) << Class;
	return QualType();
	}

	if (T->isFunctionType() && getLangOpts().OpenCL &&
	!getOpenCLOptions().isAvailableOption("__cl_clang_function_pointers",
	getLangOpts())) {
	Diag(Loc, diag::err_opencl_function_pointer) << /pointer/ 0;
	return QualType();
	}

	if (getLangOpts().HLSL) {
	Diag(Loc, diag::err_hlsl_pointers_unsupported) << 0;
	return QualType();
	}

	// Adjust the default free function calling convention to the default method
	// calling convention.
	bool IsCtorOrDtor =
	(Entity.getNameKind() == DeclarationName::CXXConstructorName) \|\|
	(Entity.getNameKind() == DeclarationName::CXXDestructorName);
	if (T->isFunctionType())
	adjustMemberFunctionCC(T, /IsStatic=/false, IsCtorOrDtor, Loc);

	return Context.getMemberPointerType(T, Class.getTypePtr());
	}

	/// Build a block pointer type.
	///
	/// \param T The type to which we'll be building a block pointer.
	///
	/// \param Loc The source location, used for diagnostics.
	///
	/// \param Entity The name of the entity that involves the block pointer
	/// type, if known.
	///
	/// \returns A suitable block pointer type, if there are no
	/// errors. Otherwise, returns a NULL type.
	QualType Sema::BuildBlockPointerType(QualType T,
	SourceLocation Loc,
	DeclarationName Entity) {
	if (!T->isFunctionType()) {
	Diag(Loc, diag::err_nonfunction_block_type);
	return QualType();
	}

	if (checkQualifiedFunction(*this, T, Loc, QFK_BlockPointer))
	return QualType();

	if (getLangOpts().OpenCL)
	T = deduceOpenCLPointeeAddrSpace(*this, T);

	return Context.getBlockPointerType(T);
	}

	QualType Sema::GetTypeFromParser(ParsedType Ty, TypeSourceInfo **TInfo) {
	QualType QT = Ty.get();
	if (QT.isNull()) {
	if (TInfo) *TInfo = nullptr;
	return QualType();
	}

	TypeSourceInfo *DI = nullptr;
	if (const LocInfoType *LIT = dyn_cast<LocInfoType>(QT)) {
	QT = LIT->getType();
	DI = LIT->getTypeSourceInfo();
	}

	if (TInfo) *TInfo = DI;
	return QT;
	}

	static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state,
	Qualifiers::ObjCLifetime ownership,
	unsigned chunkIndex);

	/// Given that this is the declaration of a parameter under ARC,
	/// attempt to infer attributes and such for pointer-to-whatever
	/// types.
	static void inferARCWriteback(TypeProcessingState &state,
	QualType &declSpecType) {
	Sema &S = state.getSema();
	Declarator &declarator = state.getDeclarator();

	// TODO: should we care about decl qualifiers?

	// Check whether the declarator has the expected form. We walk
	// from the inside out in order to make the block logic work.
	unsigned outermostPointerIndex = 0;
	bool isBlockPointer = false;
	unsigned numPointers = 0;
	for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) {
	unsigned chunkIndex = i;
	DeclaratorChunk &chunk = declarator.getTypeObject(chunkIndex);
	switch (chunk.Kind) {
	case DeclaratorChunk::Paren:
	// Ignore parens.
	break;

	case DeclaratorChunk::Reference:
	case DeclaratorChunk::Pointer:
	// Count the number of pointers. Treat references
	// interchangeably as pointers; if they're mis-ordered, normal
	// type building will discover that.
	outermostPointerIndex = chunkIndex;
	numPointers++;
	break;

	case DeclaratorChunk::BlockPointer:
	// If we have a pointer to block pointer, that's an acceptable
	// indirect reference; anything else is not an application of
	// the rules.
	if (numPointers != 1) return;
	numPointers++;
	outermostPointerIndex = chunkIndex;
	isBlockPointer = true;

	// We don't care about pointer structure in return values here.
	goto done;

	case DeclaratorChunk::Array: // suppress if written (id[])?
	case DeclaratorChunk::Function:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	return;
	}
	}
	done:

	// If we have one pointer, then we want to throw the qualifier on
	// the declaration-specifiers, which means that it needs to be a
	// retainable object type.
	if (numPointers == 1) {
	// If it's not a retainable object type, the rule doesn't apply.
	if (!declSpecType->isObjCRetainableType()) return;

	// If it already has lifetime, don't do anything.
	if (declSpecType.getObjCLifetime()) return;

	// Otherwise, modify the type in-place.
	Qualifiers qs;

	if (declSpecType->isObjCARCImplicitlyUnretainedType())
	qs.addObjCLifetime(Qualifiers::OCL_ExplicitNone);
	else
	qs.addObjCLifetime(Qualifiers::OCL_Autoreleasing);
	declSpecType = S.Context.getQualifiedType(declSpecType, qs);

	// If we have two pointers, then we want to throw the qualifier on
	// the outermost pointer.
	} else if (numPointers == 2) {
	// If we don't have a block pointer, we need to check whether the
	// declaration-specifiers gave us something that will turn into a
	// retainable object pointer after we slap the first pointer on it.
	if (!isBlockPointer && !declSpecType->isObjCObjectType())
	return;

	// Look for an explicit lifetime attribute there.
	DeclaratorChunk &chunk = declarator.getTypeObject(outermostPointerIndex);
	if (chunk.Kind != DeclaratorChunk::Pointer &&
	chunk.Kind != DeclaratorChunk::BlockPointer)
	return;
	for (const ParsedAttr &AL : chunk.getAttrs())
	if (AL.getKind() == ParsedAttr::AT_ObjCOwnership)
	return;

	transferARCOwnershipToDeclaratorChunk(state, Qualifiers::OCL_Autoreleasing,
	outermostPointerIndex);

	// Any other number of pointers/references does not trigger the rule.
	} else return;

	// TODO: mark whether we did this inference?
	}

	void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
	SourceLocation FallbackLoc,
	SourceLocation ConstQualLoc,
	SourceLocation VolatileQualLoc,
	SourceLocation RestrictQualLoc,
	SourceLocation AtomicQualLoc,
	SourceLocation UnalignedQualLoc) {
	if (!Quals)
	return;

	struct Qual {
	const char *Name;
	unsigned Mask;
	SourceLocation Loc;
	} const QualKinds[5] = {
	{ "const", DeclSpec::TQ_const, ConstQualLoc },
	{ "volatile", DeclSpec::TQ_volatile, VolatileQualLoc },
	{ "restrict", DeclSpec::TQ_restrict, RestrictQualLoc },
	{ "__unaligned", DeclSpec::TQ_unaligned, UnalignedQualLoc },
	{ "_Atomic", DeclSpec::TQ_atomic, AtomicQualLoc }
	};

	SmallString<32> QualStr;
	unsigned NumQuals = 0;
	SourceLocation Loc;
	FixItHint FixIts[5];

	// Build a string naming the redundant qualifiers.
	for (auto &E : QualKinds) {
	if (Quals & E.Mask) {
	if (!QualStr.empty()) QualStr += ' ';
	QualStr += E.Name;

	// If we have a location for the qualifier, offer a fixit.
	SourceLocation QualLoc = E.Loc;
	if (QualLoc.isValid()) {
	FixIts[NumQuals] = FixItHint::CreateRemoval(QualLoc);
	if (Loc.isInvalid() \|\|
	getSourceManager().isBeforeInTranslationUnit(QualLoc, Loc))
	Loc = QualLoc;
	}

	++NumQuals;
	}
	}

	Diag(Loc.isInvalid() ? FallbackLoc : Loc, DiagID)
	<< QualStr << NumQuals << FixIts[0] << FixIts[1] << FixIts[2] << FixIts[3];
	}

	// Diagnose pointless type qualifiers on the return type of a function.
	static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy,
	Declarator &D,
	unsigned FunctionChunkIndex) {
	const DeclaratorChunk::FunctionTypeInfo &FTI =
	D.getTypeObject(FunctionChunkIndex).Fun;
	if (FTI.hasTrailingReturnType()) {
	S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
	RetTy.getLocalCVRQualifiers(),
	FTI.getTrailingReturnTypeLoc());
	return;
	}

	for (unsigned OuterChunkIndex = FunctionChunkIndex + 1,
	End = D.getNumTypeObjects();
	OuterChunkIndex != End; ++OuterChunkIndex) {
	DeclaratorChunk &OuterChunk = D.getTypeObject(OuterChunkIndex);
	switch (OuterChunk.Kind) {
	case DeclaratorChunk::Paren:
	continue;

	case DeclaratorChunk::Pointer: {
	DeclaratorChunk::PointerTypeInfo &PTI = OuterChunk.Ptr;
	S.diagnoseIgnoredQualifiers(
	diag::warn_qual_return_type,
	PTI.TypeQuals,
	SourceLocation(),
	PTI.ConstQualLoc,
	PTI.VolatileQualLoc,
	PTI.RestrictQualLoc,
	PTI.AtomicQualLoc,
	PTI.UnalignedQualLoc);
	return;
	}

	case DeclaratorChunk::Function:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	// FIXME: We can't currently provide an accurate source location and a
	// fix-it hint for these.
	unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0;
	S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
	RetTy.getCVRQualifiers() \| AtomicQual,
	D.getIdentifierLoc());
	return;
	}

	llvm_unreachable("unknown declarator chunk kind");
	}

	// If the qualifiers come from a conversion function type, don't diagnose
	// them -- they're not necessarily redundant, since such a conversion
	// operator can be explicitly called as "x.operator const int()".
	if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
	return;

	// Just parens all the way out to the decl specifiers. Diagnose any qualifiers
	// which are present there.
	S.diagnoseIgnoredQualifiers(diag::warn_qual_return_type,
	D.getDeclSpec().getTypeQualifiers(),
	D.getIdentifierLoc(),
	D.getDeclSpec().getConstSpecLoc(),
	D.getDeclSpec().getVolatileSpecLoc(),
	D.getDeclSpec().getRestrictSpecLoc(),
	D.getDeclSpec().getAtomicSpecLoc(),
	D.getDeclSpec().getUnalignedSpecLoc());
	}

	static std::pair<QualType, TypeSourceInfo *>
	InventTemplateParameter(TypeProcessingState &state, QualType T,
	TypeSourceInfo TrailingTSI, AutoType Auto,
	InventedTemplateParameterInfo &Info) {
	Sema &S = state.getSema();
	Declarator &D = state.getDeclarator();

	const unsigned TemplateParameterDepth = Info.AutoTemplateParameterDepth;
	const unsigned AutoParameterPosition = Info.TemplateParams.size();
	const bool IsParameterPack = D.hasEllipsis();

	// If auto is mentioned in a lambda parameter or abbreviated function
	// template context, convert it to a template parameter type.

	// Create the TemplateTypeParmDecl here to retrieve the corresponding
	// template parameter type. Template parameters are temporarily added
	// to the TU until the associated TemplateDecl is created.
	TemplateTypeParmDecl *InventedTemplateParam =
	TemplateTypeParmDecl::Create(
	S.Context, S.Context.getTranslationUnitDecl(),
	/KeyLoc=/D.getDeclSpec().getTypeSpecTypeLoc(),
	/NameLoc=/D.getIdentifierLoc(),
	TemplateParameterDepth, AutoParameterPosition,
	S.InventAbbreviatedTemplateParameterTypeName(
	D.getIdentifier(), AutoParameterPosition), false,
	IsParameterPack, /HasTypeConstraint=/Auto->isConstrained());
	InventedTemplateParam->setImplicit();
	Info.TemplateParams.push_back(InventedTemplateParam);

	// Attach type constraints to the new parameter.
	if (Auto->isConstrained()) {
	if (TrailingTSI) {
	// The 'auto' appears in a trailing return type we've already built;
	// extract its type constraints to attach to the template parameter.
	AutoTypeLoc AutoLoc = TrailingTSI->getTypeLoc().getContainedAutoTypeLoc();
	TemplateArgumentListInfo TAL(AutoLoc.getLAngleLoc(), AutoLoc.getRAngleLoc());
	bool Invalid = false;
	for (unsigned Idx = 0; Idx < AutoLoc.getNumArgs(); ++Idx) {
	if (D.getEllipsisLoc().isInvalid() && !Invalid &&
	S.DiagnoseUnexpandedParameterPack(AutoLoc.getArgLoc(Idx),
	Sema::UPPC_TypeConstraint))
	Invalid = true;
	TAL.addArgument(AutoLoc.getArgLoc(Idx));
	}

	if (!Invalid) {
	S.AttachTypeConstraint(
	AutoLoc.getNestedNameSpecifierLoc(), AutoLoc.getConceptNameInfo(),
	AutoLoc.getNamedConcept(),
	AutoLoc.hasExplicitTemplateArgs() ? &TAL : nullptr,
	InventedTemplateParam, D.getEllipsisLoc());
	}
	} else {
	// The 'auto' appears in the decl-specifiers; we've not finished forming
	// TypeSourceInfo for it yet.
	TemplateIdAnnotation *TemplateId = D.getDeclSpec().getRepAsTemplateId();
	TemplateArgumentListInfo TemplateArgsInfo;
	bool Invalid = false;
	if (TemplateId->LAngleLoc.isValid()) {
	ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
	TemplateId->NumArgs);
	S.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);

	if (D.getEllipsisLoc().isInvalid()) {
	for (TemplateArgumentLoc Arg : TemplateArgsInfo.arguments()) {
	if (S.DiagnoseUnexpandedParameterPack(Arg,
	Sema::UPPC_TypeConstraint)) {
	Invalid = true;
	break;
	}
	}
	}
	}
	if (!Invalid) {
	S.AttachTypeConstraint(
	D.getDeclSpec().getTypeSpecScope().getWithLocInContext(S.Context),
	DeclarationNameInfo(DeclarationName(TemplateId->Name),
	TemplateId->TemplateNameLoc),
	cast<ConceptDecl>(TemplateId->Template.get().getAsTemplateDecl()),
	TemplateId->LAngleLoc.isValid() ? &TemplateArgsInfo : nullptr,
	InventedTemplateParam, D.getEllipsisLoc());
	}
	}
	}

	// Replace the 'auto' in the function parameter with this invented
	// template type parameter.
	// FIXME: Retain some type sugar to indicate that this was written
	// as 'auto'?
	QualType Replacement(InventedTemplateParam->getTypeForDecl(), 0);
	QualType NewT = state.ReplaceAutoType(T, Replacement);
	TypeSourceInfo *NewTSI =
	TrailingTSI ? S.ReplaceAutoTypeSourceInfo(TrailingTSI, Replacement)
	: nullptr;
	return {NewT, NewTSI};
	}

	static TypeSourceInfo *
	GetTypeSourceInfoForDeclarator(TypeProcessingState &State,
	QualType T, TypeSourceInfo *ReturnTypeInfo);

	static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
	TypeSourceInfo *&ReturnTypeInfo) {
	Sema &SemaRef = state.getSema();
	Declarator &D = state.getDeclarator();
	QualType T;
	ReturnTypeInfo = nullptr;

	// The TagDecl owned by the DeclSpec.
	TagDecl *OwnedTagDecl = nullptr;

	switch (D.getName().getKind()) {
	case UnqualifiedIdKind::IK_ImplicitSelfParam:
	case UnqualifiedIdKind::IK_OperatorFunctionId:
	case UnqualifiedIdKind::IK_Identifier:
	case UnqualifiedIdKind::IK_LiteralOperatorId:
	case UnqualifiedIdKind::IK_TemplateId:
	T = ConvertDeclSpecToType(state);

	if (!D.isInvalidType() && D.getDeclSpec().isTypeSpecOwned()) {
	OwnedTagDecl = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
	// Owned declaration is embedded in declarator.
	OwnedTagDecl->setEmbeddedInDeclarator(true);
	}
	break;

	case UnqualifiedIdKind::IK_ConstructorName:
	case UnqualifiedIdKind::IK_ConstructorTemplateId:
	case UnqualifiedIdKind::IK_DestructorName:
	// Constructors and destructors don't have return types. Use
	// "void" instead.
	T = SemaRef.Context.VoidTy;
	processTypeAttrs(state, T, TAL_DeclSpec,
	D.getMutableDeclSpec().getAttributes());
	break;

	case UnqualifiedIdKind::IK_DeductionGuideName:
	// Deduction guides have a trailing return type and no type in their
	// decl-specifier sequence. Use a placeholder return type for now.
	T = SemaRef.Context.DependentTy;
	break;

	case UnqualifiedIdKind::IK_ConversionFunctionId:
	// The result type of a conversion function is the type that it
	// converts to.
	T = SemaRef.GetTypeFromParser(D.getName().ConversionFunctionId,
	&ReturnTypeInfo);
	break;
	}

	// Note: We don't need to distribute declaration attributes (i.e.
	// D.getDeclarationAttributes()) because those are always C++11 attributes,
	// and those don't get distributed.
	distributeTypeAttrsFromDeclarator(state, T);

	// Find the deduced type in this type. Look in the trailing return type if we
	// have one, otherwise in the DeclSpec type.
	// FIXME: The standard wording doesn't currently describe this.
	DeducedType *Deduced = T->getContainedDeducedType();
	bool DeducedIsTrailingReturnType = false;
	if (Deduced && isa<AutoType>(Deduced) && D.hasTrailingReturnType()) {
	QualType T = SemaRef.GetTypeFromParser(D.getTrailingReturnType());
	Deduced = T.isNull() ? nullptr : T->getContainedDeducedType();
	DeducedIsTrailingReturnType = true;
	}

	// C++11 [dcl.spec.auto]p5: reject 'auto' if it is not in an allowed context.
	if (Deduced) {
	AutoType *Auto = dyn_cast<AutoType>(Deduced);
	int Error = -1;

	// Is this a 'auto' or 'decltype(auto)' type (as opposed to __auto_type or
	// class template argument deduction)?
	bool IsCXXAutoType =
	(Auto && Auto->getKeyword() != AutoTypeKeyword::GNUAutoType);
	bool IsDeducedReturnType = false;

	switch (D.getContext()) {
	case DeclaratorContext::LambdaExpr:
	// Declared return type of a lambda-declarator is implicit and is always
	// 'auto'.
	break;
	case DeclaratorContext::ObjCParameter:
	case DeclaratorContext::ObjCResult:
	Error = 0;
	break;
	case DeclaratorContext::RequiresExpr:
	Error = 22;
	break;
	case DeclaratorContext::Prototype:
	case DeclaratorContext::LambdaExprParameter: {
	InventedTemplateParameterInfo *Info = nullptr;
	if (D.getContext() == DeclaratorContext::Prototype) {
	// With concepts we allow 'auto' in function parameters.
	if (!SemaRef.getLangOpts().CPlusPlus20 \|\| !Auto \|\|
	Auto->getKeyword() != AutoTypeKeyword::Auto) {
	Error = 0;
	break;
	} else if (!SemaRef.getCurScope()->isFunctionDeclarationScope()) {
	Error = 21;
	break;
	}

	Info = &SemaRef.InventedParameterInfos.back();
	} else {
	// In C++14, generic lambdas allow 'auto' in their parameters.
	if (!SemaRef.getLangOpts().CPlusPlus14 \|\| !Auto \|\|
	Auto->getKeyword() != AutoTypeKeyword::Auto) {
	Error = 16;
	break;
	}
	Info = SemaRef.getCurLambda();
	assert(Info && "No LambdaScopeInfo on the stack!");
	}

	// We'll deal with inventing template parameters for 'auto' in trailing
	// return types when we pick up the trailing return type when processing
	// the function chunk.
	if (!DeducedIsTrailingReturnType)
	T = InventTemplateParameter(state, T, nullptr, Auto, *Info).first;
	break;
	}
	case DeclaratorContext::Member: {
	if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static \|\|
	D.isFunctionDeclarator())
	break;
	bool Cxx = SemaRef.getLangOpts().CPlusPlus;
	if (isa<ObjCContainerDecl>(SemaRef.CurContext)) {
	Error = 6; // Interface member.
	} else {
	switch (cast<TagDecl>(SemaRef.CurContext)->getTagKind()) {
	case TTK_Enum: llvm_unreachable("unhandled tag kind");
	case TTK_Struct: Error = Cxx ? 1 : 2; /* Struct member */ break;
	case TTK_Union: Error = Cxx ? 3 : 4; /* Union member */ break;
	case TTK_Class: Error = 5; /* Class member */ break;
	case TTK_Interface: Error = 6; /* Interface member */ break;
	}
	}
	if (D.getDeclSpec().isFriendSpecified())
	Error = 20; // Friend type
	break;
	}
	case DeclaratorContext::CXXCatch:
	case DeclaratorContext::ObjCCatch:
	Error = 7; // Exception declaration
	break;
	case DeclaratorContext::TemplateParam:
	if (isa<DeducedTemplateSpecializationType>(Deduced) &&
	!SemaRef.getLangOpts().CPlusPlus20)
	Error = 19; // Template parameter (until C++20)
	else if (!SemaRef.getLangOpts().CPlusPlus17)
	Error = 8; // Template parameter (until C++17)
	break;
	case DeclaratorContext::BlockLiteral:
	Error = 9; // Block literal
	break;
	case DeclaratorContext::TemplateArg:
	// Within a template argument list, a deduced template specialization
	// type will be reinterpreted as a template template argument.
	if (isa<DeducedTemplateSpecializationType>(Deduced) &&
	!D.getNumTypeObjects() &&
	D.getDeclSpec().getParsedSpecifiers() == DeclSpec::PQ_TypeSpecifier)
	break;
	LLVM_FALLTHROUGH;
	case DeclaratorContext::TemplateTypeArg:
	Error = 10; // Template type argument
	break;
	case DeclaratorContext::AliasDecl:
	case DeclaratorContext::AliasTemplate:
	Error = 12; // Type alias
	break;
	case DeclaratorContext::TrailingReturn:
	case DeclaratorContext::TrailingReturnVar:
	if (!SemaRef.getLangOpts().CPlusPlus14 \|\| !IsCXXAutoType)
	Error = 13; // Function return type
	IsDeducedReturnType = true;
	break;
	case DeclaratorContext::ConversionId:
	if (!SemaRef.getLangOpts().CPlusPlus14 \|\| !IsCXXAutoType)
	Error = 14; // conversion-type-id
	IsDeducedReturnType = true;
	break;
	case DeclaratorContext::FunctionalCast:
	if (isa<DeducedTemplateSpecializationType>(Deduced))
	break;
	if (SemaRef.getLangOpts().CPlusPlus2b && IsCXXAutoType &&
	!Auto->isDecltypeAuto())
	break; // auto(x)
	LLVM_FALLTHROUGH;
	case DeclaratorContext::TypeName:
	case DeclaratorContext::Association:
	Error = 15; // Generic
	break;
	case DeclaratorContext::File:
	case DeclaratorContext::Block:
	case DeclaratorContext::ForInit:
	case DeclaratorContext::SelectionInit:
	case DeclaratorContext::Condition:
	// FIXME: P0091R3 (erroneously) does not permit class template argument
	// deduction in conditions, for-init-statements, and other declarations
	// that are not simple-declarations.
	break;
	case DeclaratorContext::CXXNew:
	// FIXME: P0091R3 does not permit class template argument deduction here,
	// but we follow GCC and allow it anyway.
	if (!IsCXXAutoType && !isa<DeducedTemplateSpecializationType>(Deduced))
	Error = 17; // 'new' type
	break;
	case DeclaratorContext::KNRTypeList:
	Error = 18; // K&R function parameter
	break;
	}

	if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef)
	Error = 11;

	// In Objective-C it is an error to use 'auto' on a function declarator
	// (and everywhere for '__auto_type').
	if (D.isFunctionDeclarator() &&
	(!SemaRef.getLangOpts().CPlusPlus11 \|\| !IsCXXAutoType))
	Error = 13;

	SourceRange AutoRange = D.getDeclSpec().getTypeSpecTypeLoc();
	if (D.getName().getKind() == UnqualifiedIdKind::IK_ConversionFunctionId)
	AutoRange = D.getName().getSourceRange();

	if (Error != -1) {
	unsigned Kind;
	if (Auto) {
	switch (Auto->getKeyword()) {
	case AutoTypeKeyword::Auto: Kind = 0; break;
	case AutoTypeKeyword::DecltypeAuto: Kind = 1; break;
	case AutoTypeKeyword::GNUAutoType: Kind = 2; break;
	}
	} else {
	assert(isa<DeducedTemplateSpecializationType>(Deduced) &&
	"unknown auto type");
	Kind = 3;
	}

	auto *DTST = dyn_cast<DeducedTemplateSpecializationType>(Deduced);
	TemplateName TN = DTST ? DTST->getTemplateName() : TemplateName();

	SemaRef.Diag(AutoRange.getBegin(), diag::err_auto_not_allowed)
	<< Kind << Error << (int)SemaRef.getTemplateNameKindForDiagnostics(TN)
	<< QualType(Deduced, 0) << AutoRange;
	if (auto *TD = TN.getAsTemplateDecl())
	SemaRef.Diag(TD->getLocation(), diag::note_template_decl_here);

	T = SemaRef.Context.IntTy;
	D.setInvalidType(true);
	} else if (Auto && D.getContext() != DeclaratorContext::LambdaExpr) {
	// If there was a trailing return type, we already got
	// warn_cxx98_compat_trailing_return_type in the parser.
	SemaRef.Diag(AutoRange.getBegin(),
	D.getContext() == DeclaratorContext::LambdaExprParameter
	? diag::warn_cxx11_compat_generic_lambda
	: IsDeducedReturnType
	? diag::warn_cxx11_compat_deduced_return_type
	: diag::warn_cxx98_compat_auto_type_specifier)
	<< AutoRange;
	}
	}

	if (SemaRef.getLangOpts().CPlusPlus &&
	OwnedTagDecl && OwnedTagDecl->isCompleteDefinition()) {
	// Check the contexts where C++ forbids the declaration of a new class
	// or enumeration in a type-specifier-seq.
	unsigned DiagID = 0;
	switch (D.getContext()) {
	case DeclaratorContext::TrailingReturn:
	case DeclaratorContext::TrailingReturnVar:
	// Class and enumeration definitions are syntactically not allowed in
	// trailing return types.
	llvm_unreachable("parser should not have allowed this");
	break;
	case DeclaratorContext::File:
	case DeclaratorContext::Member:
	case DeclaratorContext::Block:
	case DeclaratorContext::ForInit:
	case DeclaratorContext::SelectionInit:
	case DeclaratorContext::BlockLiteral:
	case DeclaratorContext::LambdaExpr:
	// C++11 [dcl.type]p3:
	// A type-specifier-seq shall not define a class or enumeration unless
	// it appears in the type-id of an alias-declaration (7.1.3) that is not
	// the declaration of a template-declaration.
	case DeclaratorContext::AliasDecl:
	break;
	case DeclaratorContext::AliasTemplate:
	DiagID = diag::err_type_defined_in_alias_template;
	break;
	case DeclaratorContext::TypeName:
	case DeclaratorContext::FunctionalCast:
	case DeclaratorContext::ConversionId:
	case DeclaratorContext::TemplateParam:
	case DeclaratorContext::CXXNew:
	case DeclaratorContext::CXXCatch:
	case DeclaratorContext::ObjCCatch:
	case DeclaratorContext::TemplateArg:
	case DeclaratorContext::TemplateTypeArg:
	case DeclaratorContext::Association:
	DiagID = diag::err_type_defined_in_type_specifier;
	break;
	case DeclaratorContext::Prototype:
	case DeclaratorContext::LambdaExprParameter:
	case DeclaratorContext::ObjCParameter:
	case DeclaratorContext::ObjCResult:
	case DeclaratorContext::KNRTypeList:
	case DeclaratorContext::RequiresExpr:
	// C++ [dcl.fct]p6:
	// Types shall not be defined in return or parameter types.
	DiagID = diag::err_type_defined_in_param_type;
	break;
	case DeclaratorContext::Condition:
	// C++ 6.4p2:
	// The type-specifier-seq shall not contain typedef and shall not declare
	// a new class or enumeration.
	DiagID = diag::err_type_defined_in_condition;
	break;
	}

	if (DiagID != 0) {
	SemaRef.Diag(OwnedTagDecl->getLocation(), DiagID)
	<< SemaRef.Context.getTypeDeclType(OwnedTagDecl);
	D.setInvalidType(true);
	}
	}

	assert(!T.isNull() && "This function should not return a null type");
	return T;
	}

	/// Produce an appropriate diagnostic for an ambiguity between a function
	/// declarator and a C++ direct-initializer.
	static void warnAboutAmbiguousFunction(Sema &S, Declarator &D,
	DeclaratorChunk &DeclType, QualType RT) {
	const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
	assert(FTI.isAmbiguous && "no direct-initializer / function ambiguity");

	// If the return type is void there is no ambiguity.
	if (RT->isVoidType())
	return;

	// An initializer for a non-class type can have at most one argument.
	if (!RT->isRecordType() && FTI.NumParams > 1)
	return;

	// An initializer for a reference must have exactly one argument.
	if (RT->isReferenceType() && FTI.NumParams != 1)
	return;

	// Only warn if this declarator is declaring a function at block scope, and
	// doesn't have a storage class (such as 'extern') specified.
	if (!D.isFunctionDeclarator() \|\|
	D.getFunctionDefinitionKind() != FunctionDefinitionKind::Declaration \|\|
	!S.CurContext->isFunctionOrMethod() \|\|
	D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_unspecified)
	return;

	// Inside a condition, a direct initializer is not permitted. We allow one to
	// be parsed in order to give better diagnostics in condition parsing.
	if (D.getContext() == DeclaratorContext::Condition)
	return;

	SourceRange ParenRange(DeclType.Loc, DeclType.EndLoc);

	S.Diag(DeclType.Loc,
	FTI.NumParams ? diag::warn_parens_disambiguated_as_function_declaration
	: diag::warn_empty_parens_are_function_decl)
	<< ParenRange;

	// If the declaration looks like:
	// T var1,
	// f();
	// and name lookup finds a function named 'f', then the ',' was
	// probably intended to be a ';'.
	if (!D.isFirstDeclarator() && D.getIdentifier()) {
	FullSourceLoc Comma(D.getCommaLoc(), S.SourceMgr);
	FullSourceLoc Name(D.getIdentifierLoc(), S.SourceMgr);
	if (Comma.getFileID() != Name.getFileID() \|\|
	Comma.getSpellingLineNumber() != Name.getSpellingLineNumber()) {
	LookupResult Result(S, D.getIdentifier(), SourceLocation(),
	Sema::LookupOrdinaryName);
	if (S.LookupName(Result, S.getCurScope()))
	S.Diag(D.getCommaLoc(), diag::note_empty_parens_function_call)
	<< FixItHint::CreateReplacement(D.getCommaLoc(), ";")
	<< D.getIdentifier();
	Result.suppressDiagnostics();
	}
	}

	if (FTI.NumParams > 0) {
	// For a declaration with parameters, eg. "T var(T());", suggest adding
	// parens around the first parameter to turn the declaration into a
	// variable declaration.
	SourceRange Range = FTI.Params[0].Param->getSourceRange();
	SourceLocation B = Range.getBegin();
	SourceLocation E = S.getLocForEndOfToken(Range.getEnd());
	// FIXME: Maybe we should suggest adding braces instead of parens
	// in C++11 for classes that don't have an initializer_list constructor.
	S.Diag(B, diag::note_additional_parens_for_variable_declaration)
	<< FixItHint::CreateInsertion(B, "(")
	<< FixItHint::CreateInsertion(E, ")");
	} else {
	// For a declaration without parameters, eg. "T var();", suggest replacing
	// the parens with an initializer to turn the declaration into a variable
	// declaration.
	const CXXRecordDecl *RD = RT->getAsCXXRecordDecl();

	// Empty parens mean value-initialization, and no parens mean
	// default initialization. These are equivalent if the default
	// constructor is user-provided or if zero-initialization is a
	// no-op.
	if (RD && RD->hasDefinition() &&
	(RD->isEmpty() \|\| RD->hasUserProvidedDefaultConstructor()))
	S.Diag(DeclType.Loc, diag::note_empty_parens_default_ctor)
	<< FixItHint::CreateRemoval(ParenRange);
	else {
	std::string Init =
	S.getFixItZeroInitializerForType(RT, ParenRange.getBegin());
	if (Init.empty() && S.LangOpts.CPlusPlus11)
	Init = "{}";
	if (!Init.empty())
	S.Diag(DeclType.Loc, diag::note_empty_parens_zero_initialize)
	<< FixItHint::CreateReplacement(ParenRange, Init);
	}
	}
	}

	/// Produce an appropriate diagnostic for a declarator with top-level
	/// parentheses.
	static void warnAboutRedundantParens(Sema &S, Declarator &D, QualType T) {
	DeclaratorChunk &Paren = D.getTypeObject(D.getNumTypeObjects() - 1);
	assert(Paren.Kind == DeclaratorChunk::Paren &&
	"do not have redundant top-level parentheses");

	// This is a syntactic check; we're not interested in cases that arise
	// during template instantiation.
	if (S.inTemplateInstantiation())
	return;

	// Check whether this could be intended to be a construction of a temporary
	// object in C++ via a function-style cast.
	bool CouldBeTemporaryObject =
	S.getLangOpts().CPlusPlus && D.isExpressionContext() &&
	!D.isInvalidType() && D.getIdentifier() &&
	D.getDeclSpec().getParsedSpecifiers() == DeclSpec::PQ_TypeSpecifier &&
	(T->isRecordType() \|\| T->isDependentType()) &&
	D.getDeclSpec().getTypeQualifiers() == 0 && D.isFirstDeclarator();

	bool StartsWithDeclaratorId = true;
	for (auto &C : D.type_objects()) {
	switch (C.Kind) {
	case DeclaratorChunk::Paren:
	if (&C == &Paren)
	continue;
	LLVM_FALLTHROUGH;
	case DeclaratorChunk::Pointer:
	StartsWithDeclaratorId = false;
	continue;

	case DeclaratorChunk::Array:
	if (!C.Arr.NumElts)
	CouldBeTemporaryObject = false;
	continue;

	case DeclaratorChunk::Reference:
	// FIXME: Suppress the warning here if there is no initializer; we're
	// going to give an error anyway.
	// We assume that something like 'T (&x) = y;' is highly likely to not
	// be intended to be a temporary object.
	CouldBeTemporaryObject = false;
	StartsWithDeclaratorId = false;
	continue;

	case DeclaratorChunk::Function:
	// In a new-type-id, function chunks require parentheses.
	if (D.getContext() == DeclaratorContext::CXXNew)
	return;
	// FIXME: "A(f())" deserves a vexing-parse warning, not just a
	// redundant-parens warning, but we don't know whether the function
	// chunk was syntactically valid as an expression here.
	CouldBeTemporaryObject = false;
	continue;

	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	// These cannot appear in expressions.
	CouldBeTemporaryObject = false;
	StartsWithDeclaratorId = false;
	continue;
	}
	}

	// FIXME: If there is an initializer, assume that this is not intended to be
	// a construction of a temporary object.

	// Check whether the name has already been declared; if not, this is not a
	// function-style cast.
	if (CouldBeTemporaryObject) {
	LookupResult Result(S, D.getIdentifier(), SourceLocation(),
	Sema::LookupOrdinaryName);
	if (!S.LookupName(Result, S.getCurScope()))
	CouldBeTemporaryObject = false;
	Result.suppressDiagnostics();
	}

	SourceRange ParenRange(Paren.Loc, Paren.EndLoc);

	if (!CouldBeTemporaryObject) {
	// If we have A (::B), the parentheses affect the meaning of the program.
	// Suppress the warning in that case. Don't bother looking at the DeclSpec
	// here: even (e.g.) "int ::x" is visually ambiguous even though it's
	// formally unambiguous.
	if (StartsWithDeclaratorId && D.getCXXScopeSpec().isValid()) {
	for (NestedNameSpecifier *NNS = D.getCXXScopeSpec().getScopeRep(); NNS;
	NNS = NNS->getPrefix()) {
	if (NNS->getKind() == NestedNameSpecifier::Global)
	return;
	}
	}

	S.Diag(Paren.Loc, diag::warn_redundant_parens_around_declarator)
	<< ParenRange << FixItHint::CreateRemoval(Paren.Loc)
	<< FixItHint::CreateRemoval(Paren.EndLoc);
	return;
	}

	S.Diag(Paren.Loc, diag::warn_parens_disambiguated_as_variable_declaration)
	<< ParenRange << D.getIdentifier();
	auto *RD = T->getAsCXXRecordDecl();
	if (!RD \|\| !RD->hasDefinition() \|\| RD->hasNonTrivialDestructor())
	S.Diag(Paren.Loc, diag::note_raii_guard_add_name)
	<< FixItHint::CreateInsertion(Paren.Loc, " varname") << T
	<< D.getIdentifier();
	// FIXME: A cast to void is probably a better suggestion in cases where it's
	// valid (when there is no initializer and we're not in a condition).
	S.Diag(D.getBeginLoc(), diag::note_function_style_cast_add_parentheses)
	<< FixItHint::CreateInsertion(D.getBeginLoc(), "(")
	<< FixItHint::CreateInsertion(S.getLocForEndOfToken(D.getEndLoc()), ")");
	S.Diag(Paren.Loc, diag::note_remove_parens_for_variable_declaration)
	<< FixItHint::CreateRemoval(Paren.Loc)
	<< FixItHint::CreateRemoval(Paren.EndLoc);
	}

	/// Helper for figuring out the default CC for a function declarator type. If
	/// this is the outermost chunk, then we can determine the CC from the
	/// declarator context. If not, then this could be either a member function
	/// type or normal function type.
	static CallingConv getCCForDeclaratorChunk(
	Sema &S, Declarator &D, const ParsedAttributesView &AttrList,
	const DeclaratorChunk::FunctionTypeInfo &FTI, unsigned ChunkIndex) {
	assert(D.getTypeObject(ChunkIndex).Kind == DeclaratorChunk::Function);

	// Check for an explicit CC attribute.
	for (const ParsedAttr &AL : AttrList) {
	switch (AL.getKind()) {
	CALLING_CONV_ATTRS_CASELIST : {
	// Ignore attributes that don't validate or can't apply to the
	// function type. We'll diagnose the failure to apply them in
	// handleFunctionTypeAttr.
	CallingConv CC;
	if (!S.CheckCallingConvAttr(AL, CC) &&
	(!FTI.isVariadic \|\| supportsVariadicCall(CC))) {
	return CC;
	}
	break;
	}

	default:
	break;
	}
	}

	bool IsCXXInstanceMethod = false;

	if (S.getLangOpts().CPlusPlus) {
	// Look inwards through parentheses to see if this chunk will form a
	// member pointer type or if we're the declarator. Any type attributes
	// between here and there will override the CC we choose here.
	unsigned I = ChunkIndex;
	bool FoundNonParen = false;
	while (I && !FoundNonParen) {
	--I;
	if (D.getTypeObject(I).Kind != DeclaratorChunk::Paren)
	FoundNonParen = true;
	}

	if (FoundNonParen) {
	// If we're not the declarator, we're a regular function type unless we're
	// in a member pointer.
	IsCXXInstanceMethod =
	D.getTypeObject(I).Kind == DeclaratorChunk::MemberPointer;
	} else if (D.getContext() == DeclaratorContext::LambdaExpr) {
	// This can only be a call operator for a lambda, which is an instance
	// method.
	IsCXXInstanceMethod = true;
	} else {
	// We're the innermost decl chunk, so must be a function declarator.
	assert(D.isFunctionDeclarator());

	// If we're inside a record, we're declaring a method, but it could be
	// explicitly or implicitly static.
	IsCXXInstanceMethod =
	D.isFirstDeclarationOfMember() &&
	D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
	!D.isStaticMember();
	}
	}

	CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
	IsCXXInstanceMethod);

	// Attribute AT_OpenCLKernel affects the calling convention for SPIR
	// and AMDGPU targets, hence it cannot be treated as a calling
	// convention attribute. This is the simplest place to infer
	// calling convention for OpenCL kernels.
	if (S.getLangOpts().OpenCL) {
	for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
	if (AL.getKind() == ParsedAttr::AT_OpenCLKernel) {
	CC = CC_OpenCLKernel;
	break;
	}
	}
	} else if (S.getLangOpts().CUDA) {
	// If we're compiling CUDA/HIP code and targeting SPIR-V we need to make
	// sure the kernels will be marked with the right calling convention so that
	// they will be visible by the APIs that ingest SPIR-V.
	llvm::Triple Triple = S.Context.getTargetInfo().getTriple();
	if (Triple.getArch() == llvm::Triple::spirv32 \|\|
	Triple.getArch() == llvm::Triple::spirv64) {
	for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
	if (AL.getKind() == ParsedAttr::AT_CUDAGlobal) {
	CC = CC_OpenCLKernel;
	break;
	}
	}
	}
	}

	return CC;
	}

	namespace {
	/// A simple notion of pointer kinds, which matches up with the various
	/// pointer declarators.
	enum class SimplePointerKind {
	Pointer,
	BlockPointer,
	MemberPointer,
	Array,
	};
	} // end anonymous namespace

	IdentifierInfo *Sema::getNullabilityKeyword(NullabilityKind nullability) {
	switch (nullability) {
	case NullabilityKind::NonNull:
	if (!Ident__Nonnull)
	Ident__Nonnull = PP.getIdentifierInfo("_Nonnull");
	return Ident__Nonnull;

	case NullabilityKind::Nullable:
	if (!Ident__Nullable)
	Ident__Nullable = PP.getIdentifierInfo("_Nullable");
	return Ident__Nullable;

	case NullabilityKind::NullableResult:
	if (!Ident__Nullable_result)
	Ident__Nullable_result = PP.getIdentifierInfo("_Nullable_result");
	return Ident__Nullable_result;

	case NullabilityKind::Unspecified:
	if (!Ident__Null_unspecified)
	Ident__Null_unspecified = PP.getIdentifierInfo("_Null_unspecified");
	return Ident__Null_unspecified;
	}
	llvm_unreachable("Unknown nullability kind.");
	}

	/// Retrieve the identifier "NSError".
	IdentifierInfo *Sema::getNSErrorIdent() {
	if (!Ident_NSError)
	Ident_NSError = PP.getIdentifierInfo("NSError");

	return Ident_NSError;
	}

	/// Check whether there is a nullability attribute of any kind in the given
	/// attribute list.
	static bool hasNullabilityAttr(const ParsedAttributesView &attrs) {
	for (const ParsedAttr &AL : attrs) {
	if (AL.getKind() == ParsedAttr::AT_TypeNonNull \|\|
	AL.getKind() == ParsedAttr::AT_TypeNullable \|\|
	AL.getKind() == ParsedAttr::AT_TypeNullableResult \|\|
	AL.getKind() == ParsedAttr::AT_TypeNullUnspecified)
	return true;
	}

	return false;
	}

	namespace {
	/// Describes the kind of a pointer a declarator describes.
	enum class PointerDeclaratorKind {
	// Not a pointer.
	NonPointer,
	// Single-level pointer.
	SingleLevelPointer,
	// Multi-level pointer (of any pointer kind).
	MultiLevelPointer,
	// CFFooRef*
	MaybePointerToCFRef,
	// CFErrorRef*
	CFErrorRefPointer,
	// NSError**
	NSErrorPointerPointer,
	};

	/// Describes a declarator chunk wrapping a pointer that marks inference as
	/// unexpected.
	// These values must be kept in sync with diagnostics.
	enum class PointerWrappingDeclaratorKind {
	/// Pointer is top-level.
	None = -1,
	/// Pointer is an array element.
	Array = 0,
	/// Pointer is the referent type of a C++ reference.
	Reference = 1
	};
	} // end anonymous namespace

	/// Classify the given declarator, whose type-specified is \c type, based on
	/// what kind of pointer it refers to.
	///
	/// This is used to determine the default nullability.
	static PointerDeclaratorKind
	classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator,
	PointerWrappingDeclaratorKind &wrappingKind) {
	unsigned numNormalPointers = 0;

	// For any dependent type, we consider it a non-pointer.
	if (type->isDependentType())
	return PointerDeclaratorKind::NonPointer;

	// Look through the declarator chunks to identify pointers.
	for (unsigned i = 0, n = declarator.getNumTypeObjects(); i != n; ++i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i);
	switch (chunk.Kind) {
	case DeclaratorChunk::Array:
	if (numNormalPointers == 0)
	wrappingKind = PointerWrappingDeclaratorKind::Array;
	break;

	case DeclaratorChunk::Function:
	case DeclaratorChunk::Pipe:
	break;

	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::MemberPointer:
	return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
	: PointerDeclaratorKind::SingleLevelPointer;

	case DeclaratorChunk::Paren:
	break;

	case DeclaratorChunk::Reference:
	if (numNormalPointers == 0)
	wrappingKind = PointerWrappingDeclaratorKind::Reference;
	break;

	case DeclaratorChunk::Pointer:
	++numNormalPointers;
	if (numNormalPointers > 2)
	return PointerDeclaratorKind::MultiLevelPointer;
	break;
	}
	}

	// Then, dig into the type specifier itself.
	unsigned numTypeSpecifierPointers = 0;
	do {
	// Decompose normal pointers.
	if (auto ptrType = type->getAs<PointerType>()) {
	++numNormalPointers;

	if (numNormalPointers > 2)
	return PointerDeclaratorKind::MultiLevelPointer;

	type = ptrType->getPointeeType();
	++numTypeSpecifierPointers;
	continue;
	}

	// Decompose block pointers.
	if (type->getAs<BlockPointerType>()) {
	return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
	: PointerDeclaratorKind::SingleLevelPointer;
	}

	// Decompose member pointers.
	if (type->getAs<MemberPointerType>()) {
	return numNormalPointers > 0 ? PointerDeclaratorKind::MultiLevelPointer
	: PointerDeclaratorKind::SingleLevelPointer;
	}

	// Look at Objective-C object pointers.
	if (auto objcObjectPtr = type->getAs<ObjCObjectPointerType>()) {
	++numNormalPointers;
	++numTypeSpecifierPointers;

	// If this is NSError**, report that.
	if (auto objcClassDecl = objcObjectPtr->getInterfaceDecl()) {
	if (objcClassDecl->getIdentifier() == S.getNSErrorIdent() &&
	numNormalPointers == 2 && numTypeSpecifierPointers < 2) {
	return PointerDeclaratorKind::NSErrorPointerPointer;
	}
	}

	break;
	}

	// Look at Objective-C class types.
	if (auto objcClass = type->getAs<ObjCInterfaceType>()) {
	if (objcClass->getInterface()->getIdentifier() == S.getNSErrorIdent()) {
	if (numNormalPointers == 2 && numTypeSpecifierPointers < 2)
	return PointerDeclaratorKind::NSErrorPointerPointer;
	}

	break;
	}

	// If at this point we haven't seen a pointer, we won't see one.
	if (numNormalPointers == 0)
	return PointerDeclaratorKind::NonPointer;

	if (auto recordType = type->getAs<RecordType>()) {
	RecordDecl *recordDecl = recordType->getDecl();

	// If this is CFErrorRef*, report it as such.
	if (numNormalPointers == 2 && numTypeSpecifierPointers < 2 &&
	S.isCFError(recordDecl)) {
	return PointerDeclaratorKind::CFErrorRefPointer;
	}
	break;
	}

	break;
	} while (true);

	switch (numNormalPointers) {
	case 0:
	return PointerDeclaratorKind::NonPointer;

	case 1:
	return PointerDeclaratorKind::SingleLevelPointer;

	case 2:
	return PointerDeclaratorKind::MaybePointerToCFRef;

	default:
	return PointerDeclaratorKind::MultiLevelPointer;
	}
	}

	bool Sema::isCFError(RecordDecl *RD) {
	// If we already know about CFError, test it directly.
	if (CFError)
	return CFError == RD;

	// Check whether this is CFError, which we identify based on its bridge to
	// NSError. CFErrorRef used to be declared with "objc_bridge" but is now
	// declared with "objc_bridge_mutable", so look for either one of the two
	// attributes.
	if (RD->getTagKind() == TTK_Struct) {
	IdentifierInfo *bridgedType = nullptr;
	if (auto bridgeAttr = RD->getAttr<ObjCBridgeAttr>())
	bridgedType = bridgeAttr->getBridgedType();
	else if (auto bridgeAttr = RD->getAttr<ObjCBridgeMutableAttr>())
	bridgedType = bridgeAttr->getBridgedType();

	if (bridgedType == getNSErrorIdent()) {
	CFError = RD;
	return true;
	}
	}

	return false;
	}

	static FileID getNullabilityCompletenessCheckFileID(Sema &S,
	SourceLocation loc) {
	// If we're anywhere in a function, method, or closure context, don't perform
	// completeness checks.
	for (DeclContext *ctx = S.CurContext; ctx; ctx = ctx->getParent()) {
	if (ctx->isFunctionOrMethod())
	return FileID();

	if (ctx->isFileContext())
	break;
	}

	// We only care about the expansion location.
	loc = S.SourceMgr.getExpansionLoc(loc);
	FileID file = S.SourceMgr.getFileID(loc);
	if (file.isInvalid())
	return FileID();

	// Retrieve file information.
	bool invalid = false;
	const SrcMgr::SLocEntry &sloc = S.SourceMgr.getSLocEntry(file, &invalid);
	if (invalid \|\| !sloc.isFile())
	return FileID();

	// We don't want to perform completeness checks on the main file or in
	// system headers.
	const SrcMgr::FileInfo &fileInfo = sloc.getFile();
	if (fileInfo.getIncludeLoc().isInvalid())
	return FileID();
	if (fileInfo.getFileCharacteristic() != SrcMgr::C_User &&
	S.Diags.getSuppressSystemWarnings()) {
	return FileID();
	}

	return file;
	}

	/// Creates a fix-it to insert a C-style nullability keyword at \p pointerLoc,
	/// taking into account whitespace before and after.
	template <typename DiagBuilderT>
	static void fixItNullability(Sema &S, DiagBuilderT &Diag,
	SourceLocation PointerLoc,
	NullabilityKind Nullability) {
	assert(PointerLoc.isValid());
	if (PointerLoc.isMacroID())
	return;

	SourceLocation FixItLoc = S.getLocForEndOfToken(PointerLoc);
	if (!FixItLoc.isValid() \|\| FixItLoc == PointerLoc)
	return;

	const char *NextChar = S.SourceMgr.getCharacterData(FixItLoc);
	if (!NextChar)
	return;

	SmallString<32> InsertionTextBuf{" "};
	InsertionTextBuf += getNullabilitySpelling(Nullability);
	InsertionTextBuf += " ";
	StringRef InsertionText = InsertionTextBuf.str();

	if (isWhitespace(*NextChar)) {
	InsertionText = InsertionText.drop_back();
	} else if (NextChar[-1] == '[') {
	if (NextChar[0] == ']')
	InsertionText = InsertionText.drop_back().drop_front();
	else
	InsertionText = InsertionText.drop_front();
	} else if (!isAsciiIdentifierContinue(NextChar[0], /allow dollar/ true) &&
	!isAsciiIdentifierContinue(NextChar[-1], /allow dollar/ true)) {
	InsertionText = InsertionText.drop_back().drop_front();
	}

	Diag << FixItHint::CreateInsertion(FixItLoc, InsertionText);
	}

	static void emitNullabilityConsistencyWarning(Sema &S,
	SimplePointerKind PointerKind,
	SourceLocation PointerLoc,
	SourceLocation PointerEndLoc) {
	assert(PointerLoc.isValid());

	if (PointerKind == SimplePointerKind::Array) {
	S.Diag(PointerLoc, diag::warn_nullability_missing_array);
	} else {
	S.Diag(PointerLoc, diag::warn_nullability_missing)
	<< static_cast<unsigned>(PointerKind);
	}

	auto FixItLoc = PointerEndLoc.isValid() ? PointerEndLoc : PointerLoc;
	if (FixItLoc.isMacroID())
	return;

	auto addFixIt = [&](NullabilityKind Nullability) {
	auto Diag = S.Diag(FixItLoc, diag::note_nullability_fix_it);
	Diag << static_cast<unsigned>(Nullability);
	Diag << static_cast<unsigned>(PointerKind);
	fixItNullability(S, Diag, FixItLoc, Nullability);
	};
	addFixIt(NullabilityKind::Nullable);
	addFixIt(NullabilityKind::NonNull);
	}

	/// Complains about missing nullability if the file containing \p pointerLoc
	/// has other uses of nullability (either the keywords or the \c assume_nonnull
	/// pragma).
	///
	/// If the file has \e not seen other uses of nullability, this particular
	/// pointer is saved for possible later diagnosis. See recordNullabilitySeen().
	static void
	checkNullabilityConsistency(Sema &S, SimplePointerKind pointerKind,
	SourceLocation pointerLoc,
	SourceLocation pointerEndLoc = SourceLocation()) {
	// Determine which file we're performing consistency checking for.
	FileID file = getNullabilityCompletenessCheckFileID(S, pointerLoc);
	if (file.isInvalid())
	return;

	// If we haven't seen any type nullability in this file, we won't warn now
	// about anything.
	FileNullability &fileNullability = S.NullabilityMap[file];
	if (!fileNullability.SawTypeNullability) {
	// If this is the first pointer declarator in the file, and the appropriate
	// warning is on, record it in case we need to diagnose it retroactively.
	diag::kind diagKind;
	if (pointerKind == SimplePointerKind::Array)
	diagKind = diag::warn_nullability_missing_array;
	else
	diagKind = diag::warn_nullability_missing;

	if (fileNullability.PointerLoc.isInvalid() &&
	!S.Context.getDiagnostics().isIgnored(diagKind, pointerLoc)) {
	fileNullability.PointerLoc = pointerLoc;
	fileNullability.PointerEndLoc = pointerEndLoc;
	fileNullability.PointerKind = static_cast<unsigned>(pointerKind);
	}

	return;
	}

	// Complain about missing nullability.
	emitNullabilityConsistencyWarning(S, pointerKind, pointerLoc, pointerEndLoc);
	}

	/// Marks that a nullability feature has been used in the file containing
	/// \p loc.
	///
	/// If this file already had pointer types in it that were missing nullability,
	/// the first such instance is retroactively diagnosed.
	///
	/// \sa checkNullabilityConsistency
	static void recordNullabilitySeen(Sema &S, SourceLocation loc) {
	FileID file = getNullabilityCompletenessCheckFileID(S, loc);
	if (file.isInvalid())
	return;

	FileNullability &fileNullability = S.NullabilityMap[file];
	if (fileNullability.SawTypeNullability)
	return;
	fileNullability.SawTypeNullability = true;

	// If we haven't seen any type nullability before, now we have. Retroactively
	// diagnose the first unannotated pointer, if there was one.
	if (fileNullability.PointerLoc.isInvalid())
	return;

	auto kind = static_cast<SimplePointerKind>(fileNullability.PointerKind);
	emitNullabilityConsistencyWarning(S, kind, fileNullability.PointerLoc,
	fileNullability.PointerEndLoc);
	}

	/// Returns true if any of the declarator chunks before \p endIndex include a
	/// level of indirection: array, pointer, reference, or pointer-to-member.
	///
	/// Because declarator chunks are stored in outer-to-inner order, testing
	/// every chunk before \p endIndex is testing all chunks that embed the current
	/// chunk as part of their type.
	///
	/// It is legal to pass the result of Declarator::getNumTypeObjects() as the
	/// end index, in which case all chunks are tested.
	static bool hasOuterPointerLikeChunk(const Declarator &D, unsigned endIndex) {
	unsigned i = endIndex;
	while (i != 0) {
	// Walk outwards along the declarator chunks.
	--i;
	const DeclaratorChunk &DC = D.getTypeObject(i);
	switch (DC.Kind) {
	case DeclaratorChunk::Paren:
	break;
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	return true;
	case DeclaratorChunk::Function:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Pipe:
	// These are invalid anyway, so just ignore.
	break;
	}
	}
	return false;
	}

	static bool IsNoDerefableChunk(DeclaratorChunk Chunk) {
	return (Chunk.Kind == DeclaratorChunk::Pointer \|\|
	Chunk.Kind == DeclaratorChunk::Array);
	}

	template<typename AttrT>
	static AttrT *createSimpleAttr(ASTContext &Ctx, ParsedAttr &AL) {
	AL.setUsedAsTypeAttr();
	return ::new (Ctx) AttrT(Ctx, AL);
	}

	static Attr *createNullabilityAttr(ASTContext &Ctx, ParsedAttr &Attr,
	NullabilityKind NK) {
	switch (NK) {
	case NullabilityKind::NonNull:
	return createSimpleAttr<TypeNonNullAttr>(Ctx, Attr);

	case NullabilityKind::Nullable:
	return createSimpleAttr<TypeNullableAttr>(Ctx, Attr);

	case NullabilityKind::NullableResult:
	return createSimpleAttr<TypeNullableResultAttr>(Ctx, Attr);

	case NullabilityKind::Unspecified:
	return createSimpleAttr<TypeNullUnspecifiedAttr>(Ctx, Attr);
	}
	llvm_unreachable("unknown NullabilityKind");
	}

	// Diagnose whether this is a case with the multiple addr spaces.
	// Returns true if this is an invalid case.
	// ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "No type shall be qualified
	// by qualifiers for two or more different address spaces."
	static bool DiagnoseMultipleAddrSpaceAttributes(Sema &S, LangAS ASOld,
	LangAS ASNew,
	SourceLocation AttrLoc) {
	if (ASOld != LangAS::Default) {
	if (ASOld != ASNew) {
	S.Diag(AttrLoc, diag::err_attribute_address_multiple_qualifiers);
	return true;
	}
	// Emit a warning if they are identical; it's likely unintended.
	S.Diag(AttrLoc,
	diag::warn_attribute_address_multiple_identical_qualifiers);
	}
	return false;
	}

	static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
	QualType declSpecType,
	TypeSourceInfo *TInfo) {
	// The TypeSourceInfo that this function returns will not be a null type.
	// If there is an error, this function will fill in a dummy type as fallback.
	QualType T = declSpecType;
	Declarator &D = state.getDeclarator();
	Sema &S = state.getSema();
	ASTContext &Context = S.Context;
	const LangOptions &LangOpts = S.getLangOpts();

	// The name we're declaring, if any.
	DeclarationName Name;
	if (D.getIdentifier())
	Name = D.getIdentifier();

	// Does this declaration declare a typedef-name?
	bool IsTypedefName =
	D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef \|\|
	D.getContext() == DeclaratorContext::AliasDecl \|\|
	D.getContext() == DeclaratorContext::AliasTemplate;

	// Does T refer to a function type with a cv-qualifier or a ref-qualifier?
	bool IsQualifiedFunction = T->isFunctionProtoType() &&
	(!T->castAs<FunctionProtoType>()->getMethodQuals().empty() \|\|
	T->castAs<FunctionProtoType>()->getRefQualifier() != RQ_None);

	// If T is 'decltype(auto)', the only declarators we can have are parens
	// and at most one function declarator if this is a function declaration.
	// If T is a deduced class template specialization type, we can have no
	// declarator chunks at all.
	if (auto *DT = T->getAs<DeducedType>()) {
	const AutoType *AT = T->getAs<AutoType>();
	bool IsClassTemplateDeduction = isa<DeducedTemplateSpecializationType>(DT);
	if ((AT && AT->isDecltypeAuto()) \|\| IsClassTemplateDeduction) {
	for (unsigned I = 0, E = D.getNumTypeObjects(); I != E; ++I) {
	unsigned Index = E - I - 1;
	DeclaratorChunk &DeclChunk = D.getTypeObject(Index);
	unsigned DiagId = IsClassTemplateDeduction
	? diag::err_deduced_class_template_compound_type
	: diag::err_decltype_auto_compound_type;
	unsigned DiagKind = 0;
	switch (DeclChunk.Kind) {
	case DeclaratorChunk::Paren:
	// FIXME: Rejecting this is a little silly.
	if (IsClassTemplateDeduction) {
	DiagKind = 4;
	break;
	}
	continue;
	case DeclaratorChunk::Function: {
	if (IsClassTemplateDeduction) {
	DiagKind = 3;
	break;
	}
	unsigned FnIndex;
	if (D.isFunctionDeclarationContext() &&
	D.isFunctionDeclarator(FnIndex) && FnIndex == Index)
	continue;
	DiagId = diag::err_decltype_auto_function_declarator_not_declaration;
	break;
	}
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::MemberPointer:
	DiagKind = 0;
	break;
	case DeclaratorChunk::Reference:
	DiagKind = 1;
	break;
	case DeclaratorChunk::Array:
	DiagKind = 2;
	break;
	case DeclaratorChunk::Pipe:
	break;
	}

	S.Diag(DeclChunk.Loc, DiagId) << DiagKind;
	D.setInvalidType(true);
	break;
	}
	}
	}

	// Determine whether we should infer _Nonnull on pointer types.
	Optional<NullabilityKind> inferNullability;
	bool inferNullabilityCS = false;
	bool inferNullabilityInnerOnly = false;
	bool inferNullabilityInnerOnlyComplete = false;

	// Are we in an assume-nonnull region?
	bool inAssumeNonNullRegion = false;
	SourceLocation assumeNonNullLoc = S.PP.getPragmaAssumeNonNullLoc();
	if (assumeNonNullLoc.isValid()) {
	inAssumeNonNullRegion = true;
	recordNullabilitySeen(S, assumeNonNullLoc);
	}

	// Whether to complain about missing nullability specifiers or not.
	enum {
	/// Never complain.
	CAMN_No,
	/// Complain on the inner pointers (but not the outermost
	/// pointer).
	CAMN_InnerPointers,
	/// Complain about any pointers that don't have nullability
	/// specified or inferred.
	CAMN_Yes
	} complainAboutMissingNullability = CAMN_No;
	unsigned NumPointersRemaining = 0;
	auto complainAboutInferringWithinChunk = PointerWrappingDeclaratorKind::None;

	if (IsTypedefName) {
	// For typedefs, we do not infer any nullability (the default),
	// and we only complain about missing nullability specifiers on
	// inner pointers.
	complainAboutMissingNullability = CAMN_InnerPointers;

	if (T->canHaveNullability(/ResultIfUnknown/false) &&
	!T->getNullability(S.Context)) {
	// Note that we allow but don't require nullability on dependent types.
	++NumPointersRemaining;
	}

	for (unsigned i = 0, n = D.getNumTypeObjects(); i != n; ++i) {
	DeclaratorChunk &chunk = D.getTypeObject(i);
	switch (chunk.Kind) {
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Function:
	case DeclaratorChunk::Pipe:
	break;

	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::MemberPointer:
	++NumPointersRemaining;
	break;

	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Reference:
	continue;

	case DeclaratorChunk::Pointer:
	++NumPointersRemaining;
	continue;
	}
	}
	} else {
	bool isFunctionOrMethod = false;
	switch (auto context = state.getDeclarator().getContext()) {
	case DeclaratorContext::ObjCParameter:
	case DeclaratorContext::ObjCResult:
	case DeclaratorContext::Prototype:
	case DeclaratorContext::TrailingReturn:
	case DeclaratorContext::TrailingReturnVar:
	isFunctionOrMethod = true;
	LLVM_FALLTHROUGH;

	case DeclaratorContext::Member:
	if (state.getDeclarator().isObjCIvar() && !isFunctionOrMethod) {
	complainAboutMissingNullability = CAMN_No;
	break;
	}

	// Weak properties are inferred to be nullable.
	if (state.getDeclarator().isObjCWeakProperty() && inAssumeNonNullRegion) {
	inferNullability = NullabilityKind::Nullable;
	break;
	}

	LLVM_FALLTHROUGH;

	case DeclaratorContext::File:
	case DeclaratorContext::KNRTypeList: {
	complainAboutMissingNullability = CAMN_Yes;

	// Nullability inference depends on the type and declarator.
	auto wrappingKind = PointerWrappingDeclaratorKind::None;
	switch (classifyPointerDeclarator(S, T, D, wrappingKind)) {
	case PointerDeclaratorKind::NonPointer:
	case PointerDeclaratorKind::MultiLevelPointer:
	// Cannot infer nullability.
	break;

	case PointerDeclaratorKind::SingleLevelPointer:
	// Infer _Nonnull if we are in an assumes-nonnull region.
	if (inAssumeNonNullRegion) {
	complainAboutInferringWithinChunk = wrappingKind;
	inferNullability = NullabilityKind::NonNull;
	inferNullabilityCS = (context == DeclaratorContext::ObjCParameter \|\|
	context == DeclaratorContext::ObjCResult);
	}
	break;

	case PointerDeclaratorKind::CFErrorRefPointer:
	case PointerDeclaratorKind::NSErrorPointerPointer:
	// Within a function or method signature, infer _Nullable at both
	// levels.
	if (isFunctionOrMethod && inAssumeNonNullRegion)
	inferNullability = NullabilityKind::Nullable;
	break;

	case PointerDeclaratorKind::MaybePointerToCFRef:
	if (isFunctionOrMethod) {
	// On pointer-to-pointer parameters marked cf_returns_retained or
	// cf_returns_not_retained, if the outer pointer is explicit then
	// infer the inner pointer as _Nullable.
	auto hasCFReturnsAttr =
	[](const ParsedAttributesView &AttrList) -> bool {
	return AttrList.hasAttribute(ParsedAttr::AT_CFReturnsRetained) \|\|
	AttrList.hasAttribute(ParsedAttr::AT_CFReturnsNotRetained);
	};
	if (const auto *InnermostChunk = D.getInnermostNonParenChunk()) {
	if (hasCFReturnsAttr(D.getDeclarationAttributes()) \|\|
	hasCFReturnsAttr(D.getAttributes()) \|\|
	hasCFReturnsAttr(InnermostChunk->getAttrs()) \|\|
	hasCFReturnsAttr(D.getDeclSpec().getAttributes())) {
	inferNullability = NullabilityKind::Nullable;
	inferNullabilityInnerOnly = true;
	}
	}
	}
	break;
	}
	break;
	}

	case DeclaratorContext::ConversionId:
	complainAboutMissingNullability = CAMN_Yes;
	break;

	case DeclaratorContext::AliasDecl:
	case DeclaratorContext::AliasTemplate:
	case DeclaratorContext::Block:
	case DeclaratorContext::BlockLiteral:
	case DeclaratorContext::Condition:
	case DeclaratorContext::CXXCatch:
	case DeclaratorContext::CXXNew:
	case DeclaratorContext::ForInit:
	case DeclaratorContext::SelectionInit:
	case DeclaratorContext::LambdaExpr:
	case DeclaratorContext::LambdaExprParameter:
	case DeclaratorContext::ObjCCatch:
	case DeclaratorContext::TemplateParam:
	case DeclaratorContext::TemplateArg:
	case DeclaratorContext::TemplateTypeArg:
	case DeclaratorContext::TypeName:
	case DeclaratorContext::FunctionalCast:
	case DeclaratorContext::RequiresExpr:
	case DeclaratorContext::Association:
	// Don't infer in these contexts.
	break;
	}
	}

	// Local function that returns true if its argument looks like a va_list.
	auto isVaList = [&S](QualType T) -> bool {
	auto *typedefTy = T->getAs<TypedefType>();
	if (!typedefTy)
	return false;
	TypedefDecl *vaListTypedef = S.Context.getBuiltinVaListDecl();
	do {
	if (typedefTy->getDecl() == vaListTypedef)
	return true;
	if (auto *name = typedefTy->getDecl()->getIdentifier())
	if (name->isStr("va_list"))
	return true;
	typedefTy = typedefTy->desugar()->getAs<TypedefType>();
	} while (typedefTy);
	return false;
	};

	// Local function that checks the nullability for a given pointer declarator.
	// Returns true if _Nonnull was inferred.
	auto inferPointerNullability =
	[&](SimplePointerKind pointerKind, SourceLocation pointerLoc,
	SourceLocation pointerEndLoc,
	ParsedAttributesView &attrs, AttributePool &Pool) -> ParsedAttr * {
	// We've seen a pointer.
	if (NumPointersRemaining > 0)
	--NumPointersRemaining;

	// If a nullability attribute is present, there's nothing to do.
	if (hasNullabilityAttr(attrs))
	return nullptr;

	// If we're supposed to infer nullability, do so now.
	if (inferNullability && !inferNullabilityInnerOnlyComplete) {
	ParsedAttr::Syntax syntax = inferNullabilityCS
	? ParsedAttr::AS_ContextSensitiveKeyword
	: ParsedAttr::AS_Keyword;
	ParsedAttr *nullabilityAttr = Pool.create(
	S.getNullabilityKeyword(*inferNullability), SourceRange(pointerLoc),
	nullptr, SourceLocation(), nullptr, 0, syntax);

	attrs.addAtEnd(nullabilityAttr);

	if (inferNullabilityCS) {
	state.getDeclarator().getMutableDeclSpec().getObjCQualifiers()
	->setObjCDeclQualifier(ObjCDeclSpec::DQ_CSNullability);
	}

	if (pointerLoc.isValid() &&
	complainAboutInferringWithinChunk !=
	PointerWrappingDeclaratorKind::None) {
	auto Diag =
	S.Diag(pointerLoc, diag::warn_nullability_inferred_on_nested_type);
	Diag << static_cast<int>(complainAboutInferringWithinChunk);
	fixItNullability(S, Diag, pointerLoc, NullabilityKind::NonNull);
	}

	if (inferNullabilityInnerOnly)
	inferNullabilityInnerOnlyComplete = true;
	return nullabilityAttr;
	}

	// If we're supposed to complain about missing nullability, do so
	// now if it's truly missing.
	switch (complainAboutMissingNullability) {
	case CAMN_No:
	break;

	case CAMN_InnerPointers:
	if (NumPointersRemaining == 0)
	break;
	LLVM_FALLTHROUGH;

	case CAMN_Yes:
	checkNullabilityConsistency(S, pointerKind, pointerLoc, pointerEndLoc);
	}
	return nullptr;
	};

	// If the type itself could have nullability but does not, infer pointer
	// nullability and perform consistency checking.
	if (S.CodeSynthesisContexts.empty()) {
	if (T->canHaveNullability(/ResultIfUnknown/false) &&
	!T->getNullability(S.Context)) {
	if (isVaList(T)) {
	// Record that we've seen a pointer, but do nothing else.
	if (NumPointersRemaining > 0)
	--NumPointersRemaining;
	} else {
	SimplePointerKind pointerKind = SimplePointerKind::Pointer;
	if (T->isBlockPointerType())
	pointerKind = SimplePointerKind::BlockPointer;
	else if (T->isMemberPointerType())
	pointerKind = SimplePointerKind::MemberPointer;

	if (auto *attr = inferPointerNullability(
	pointerKind, D.getDeclSpec().getTypeSpecTypeLoc(),
	D.getDeclSpec().getEndLoc(),
	D.getMutableDeclSpec().getAttributes(),
	D.getMutableDeclSpec().getAttributePool())) {
	T = state.getAttributedType(
	createNullabilityAttr(Context, attr, inferNullability), T, T);
	}
	}
	}

	if (complainAboutMissingNullability == CAMN_Yes &&
	T->isArrayType() && !T->getNullability(S.Context) && !isVaList(T) &&
	D.isPrototypeContext() &&
	!hasOuterPointerLikeChunk(D, D.getNumTypeObjects())) {
	checkNullabilityConsistency(S, SimplePointerKind::Array,
	D.getDeclSpec().getTypeSpecTypeLoc());
	}
	}

	bool ExpectNoDerefChunk =
	state.getCurrentAttributes().hasAttribute(ParsedAttr::AT_NoDeref);

	// Walk the DeclTypeInfo, building the recursive type as we go.
	// DeclTypeInfos are ordered from the identifier out, which is
	// opposite of what we want :).
	for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
	unsigned chunkIndex = e - i - 1;
	state.setCurrentChunkIndex(chunkIndex);
	DeclaratorChunk &DeclType = D.getTypeObject(chunkIndex);
	IsQualifiedFunction &= DeclType.Kind == DeclaratorChunk::Paren;
	switch (DeclType.Kind) {
	case DeclaratorChunk::Paren:
	if (i == 0)
	warnAboutRedundantParens(S, D, T);
	T = S.BuildParenType(T);
	break;
	case DeclaratorChunk::BlockPointer:
	// If blocks are disabled, emit an error.
	if (!LangOpts.Blocks)
	S.Diag(DeclType.Loc, diag::err_blocks_disable) << LangOpts.OpenCL;

	// Handle pointer nullability.
	inferPointerNullability(SimplePointerKind::BlockPointer, DeclType.Loc,
	DeclType.EndLoc, DeclType.getAttrs(),
	state.getDeclarator().getAttributePool());

	T = S.BuildBlockPointerType(T, D.getIdentifierLoc(), Name);
	if (DeclType.Cls.TypeQuals \|\| LangOpts.OpenCL) {
	// OpenCL v2.0, s6.12.5 - Block variable declarations are implicitly
	// qualified with const.
	if (LangOpts.OpenCL)
	DeclType.Cls.TypeQuals \|= DeclSpec::TQ_const;
	T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Cls.TypeQuals);
	}
	break;
	case DeclaratorChunk::Pointer:
	// Verify that we're not building a pointer to pointer to function with
	// exception specification.
	if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
	S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
	D.setInvalidType(true);
	// Build the type anyway.
	}

	// Handle pointer nullability
	inferPointerNullability(SimplePointerKind::Pointer, DeclType.Loc,
	DeclType.EndLoc, DeclType.getAttrs(),
	state.getDeclarator().getAttributePool());

	if (LangOpts.ObjC && T->getAs<ObjCObjectType>()) {
	T = Context.getObjCObjectPointerType(T);
	if (DeclType.Ptr.TypeQuals)
	T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
	break;
	}

	// OpenCL v2.0 s6.9b - Pointer to image/sampler cannot be used.
	// OpenCL v2.0 s6.13.16.1 - Pointer to pipe cannot be used.
	// OpenCL v2.0 s6.12.5 - Pointers to Blocks are not allowed.
	if (LangOpts.OpenCL) {
	if (T->isImageType() \|\| T->isSamplerT() \|\| T->isPipeType() \|\|
	T->isBlockPointerType()) {
	S.Diag(D.getIdentifierLoc(), diag::err_opencl_pointer_to_type) << T;
	D.setInvalidType(true);
	}
	}

	T = S.BuildPointerType(T, DeclType.Loc, Name);
	if (DeclType.Ptr.TypeQuals)
	T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Ptr.TypeQuals);
	break;
	case DeclaratorChunk::Reference: {
	// Verify that we're not building a reference to pointer to function with
	// exception specification.
	if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
	S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
	D.setInvalidType(true);
	// Build the type anyway.
	}
	T = S.BuildReferenceType(T, DeclType.Ref.LValueRef, DeclType.Loc, Name);

	if (DeclType.Ref.HasRestrict)
	T = S.BuildQualifiedType(T, DeclType.Loc, Qualifiers::Restrict);
	break;
	}
	case DeclaratorChunk::Array: {
	// Verify that we're not building an array of pointers to function with
	// exception specification.
	if (LangOpts.CPlusPlus && S.CheckDistantExceptionSpec(T)) {
	S.Diag(D.getIdentifierLoc(), diag::err_distant_exception_spec);
	D.setInvalidType(true);
	// Build the type anyway.
	}
	DeclaratorChunk::ArrayTypeInfo &ATI = DeclType.Arr;
	Expr ArraySize = static_cast<Expr>(ATI.NumElts);
	ArrayType::ArraySizeModifier ASM;
	if (ATI.isStar)
	ASM = ArrayType::Star;
	else if (ATI.hasStatic)
	ASM = ArrayType::Static;
	else
	ASM = ArrayType::Normal;
	if (ASM == ArrayType::Star && !D.isPrototypeContext()) {
	// FIXME: This check isn't quite right: it allows star in prototypes
	// for function definitions, and disallows some edge cases detailed
	// in http://gcc.gnu.org/ml/gcc-patches/2009-02/msg00133.html
	S.Diag(DeclType.Loc, diag::err_array_star_outside_prototype);
	ASM = ArrayType::Normal;
	D.setInvalidType(true);
	}

	// C99 6.7.5.2p1: The optional type qualifiers and the keyword static
	// shall appear only in a declaration of a function parameter with an
	// array type, ...
	if (ASM == ArrayType::Static \|\| ATI.TypeQuals) {
	if (!(D.isPrototypeContext() \|\|
	D.getContext() == DeclaratorContext::KNRTypeList)) {
	S.Diag(DeclType.Loc, diag::err_array_static_outside_prototype) <<
	(ASM == ArrayType::Static ? "'static'" : "type qualifier");
	// Remove the 'static' and the type qualifiers.
	if (ASM == ArrayType::Static)
	ASM = ArrayType::Normal;
	ATI.TypeQuals = 0;
	D.setInvalidType(true);
	}

	// C99 6.7.5.2p1: ... and then only in the outermost array type
	// derivation.
	if (hasOuterPointerLikeChunk(D, chunkIndex)) {
	S.Diag(DeclType.Loc, diag::err_array_static_not_outermost) <<
	(ASM == ArrayType::Static ? "'static'" : "type qualifier");
	if (ASM == ArrayType::Static)
	ASM = ArrayType::Normal;
	ATI.TypeQuals = 0;
	D.setInvalidType(true);
	}
	}
	const AutoType *AT = T->getContainedAutoType();
	// Allow arrays of auto if we are a generic lambda parameter.
	// i.e. [](auto (&array)[5]) { return array[0]; }; OK
	if (AT && D.getContext() != DeclaratorContext::LambdaExprParameter) {
	// We've already diagnosed this for decltype(auto).
	if (!AT->isDecltypeAuto())
	S.Diag(DeclType.Loc, diag::err_illegal_decl_array_of_auto)
	<< getPrintableNameForEntity(Name) << T;
	T = QualType();
	break;
	}

	// Array parameters can be marked nullable as well, although it's not
	// necessary if they're marked 'static'.
	if (complainAboutMissingNullability == CAMN_Yes &&
	!hasNullabilityAttr(DeclType.getAttrs()) &&
	ASM != ArrayType::Static &&
	D.isPrototypeContext() &&
	!hasOuterPointerLikeChunk(D, chunkIndex)) {
	checkNullabilityConsistency(S, SimplePointerKind::Array, DeclType.Loc);
	}

	T = S.BuildArrayType(T, ASM, ArraySize, ATI.TypeQuals,
	SourceRange(DeclType.Loc, DeclType.EndLoc), Name);
	break;
	}
	case DeclaratorChunk::Function: {
	// If the function declarator has a prototype (i.e. it is not () and
	// does not have a K&R-style identifier list), then the arguments are part
	// of the type, otherwise the argument list is ().
	DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
	IsQualifiedFunction =
	FTI.hasMethodTypeQualifiers() \|\| FTI.hasRefQualifier();

	// Check for auto functions and trailing return type and adjust the
	// return type accordingly.
	if (!D.isInvalidType()) {
	// trailing-return-type is only required if we're declaring a function,
	// and not, for instance, a pointer to a function.
	if (D.getDeclSpec().hasAutoTypeSpec() &&
	!FTI.hasTrailingReturnType() && chunkIndex == 0) {
	if (!S.getLangOpts().CPlusPlus14) {
	S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
	D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto
	? diag::err_auto_missing_trailing_return
	: diag::err_deduced_return_type);
	T = Context.IntTy;
	D.setInvalidType(true);
	} else {
	S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
	diag::warn_cxx11_compat_deduced_return_type);
	}
	} else if (FTI.hasTrailingReturnType()) {
	// T must be exactly 'auto' at this point. See CWG issue 681.
	if (isa<ParenType>(T)) {
	S.Diag(D.getBeginLoc(), diag::err_trailing_return_in_parens)
	<< T << D.getSourceRange();
	D.setInvalidType(true);
	} else if (D.getName().getKind() ==
	UnqualifiedIdKind::IK_DeductionGuideName) {
	if (T != Context.DependentTy) {
	S.Diag(D.getDeclSpec().getBeginLoc(),
	diag::err_deduction_guide_with_complex_decl)
	<< D.getSourceRange();
	D.setInvalidType(true);
	}
	} else if (D.getContext() != DeclaratorContext::LambdaExpr &&
	(T.hasQualifiers() \|\| !isa<AutoType>(T) \|\|
	cast<AutoType>(T)->getKeyword() !=
	AutoTypeKeyword::Auto \|\|
	cast<AutoType>(T)->isConstrained())) {
	S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
	diag::err_trailing_return_without_auto)
	<< T << D.getDeclSpec().getSourceRange();
	D.setInvalidType(true);
	}
	T = S.GetTypeFromParser(FTI.getTrailingReturnType(), &TInfo);
	if (T.isNull()) {
	// An error occurred parsing the trailing return type.
	T = Context.IntTy;
	D.setInvalidType(true);
	} else if (AutoType *Auto = T->getContainedAutoType()) {
	// If the trailing return type contains an `auto`, we may need to
	// invent a template parameter for it, for cases like
	// `auto f() -> C auto` or `[](auto (*p) -> auto) {}`.
	InventedTemplateParameterInfo *InventedParamInfo = nullptr;
	if (D.getContext() == DeclaratorContext::Prototype)
	InventedParamInfo = &S.InventedParameterInfos.back();
	else if (D.getContext() == DeclaratorContext::LambdaExprParameter)
	InventedParamInfo = S.getCurLambda();
	if (InventedParamInfo) {
	std::tie(T, TInfo) = InventTemplateParameter(
	state, T, TInfo, Auto, *InventedParamInfo);
	}
	}
	} else {
	// This function type is not the type of the entity being declared,
	// so checking the 'auto' is not the responsibility of this chunk.
	}
	}

	// C99 6.7.5.3p1: The return type may not be a function or array type.
	// For conversion functions, we'll diagnose this particular error later.
	if (!D.isInvalidType() && (T->isArrayType() \|\| T->isFunctionType()) &&
	(D.getName().getKind() !=
	UnqualifiedIdKind::IK_ConversionFunctionId)) {
	unsigned diagID = diag::err_func_returning_array_function;
	// Last processing chunk in block context means this function chunk
	// represents the block.
	if (chunkIndex == 0 &&
	D.getContext() == DeclaratorContext::BlockLiteral)
	diagID = diag::err_block_returning_array_function;
	S.Diag(DeclType.Loc, diagID) << T->isFunctionType() << T;
	T = Context.IntTy;
	D.setInvalidType(true);
	}

	// Do not allow returning half FP value.
	// FIXME: This really should be in BuildFunctionType.
	if (T->isHalfType()) {
	if (S.getLangOpts().OpenCL) {
	if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16",
	S.getLangOpts())) {
	S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
	<< T << 0 /pointer hint/;
	D.setInvalidType(true);
	}
	} else if (!S.getLangOpts().HalfArgsAndReturns) {
	S.Diag(D.getIdentifierLoc(),
	diag::err_parameters_retval_cannot_have_fp16_type) << 1;
	D.setInvalidType(true);
	}
	}

	if (LangOpts.OpenCL) {
	// OpenCL v2.0 s6.12.5 - A block cannot be the return value of a
	// function.
	if (T->isBlockPointerType() \|\| T->isImageType() \|\| T->isSamplerT() \|\|
	T->isPipeType()) {
	S.Diag(D.getIdentifierLoc(), diag::err_opencl_invalid_return)
	<< T << 1 /hint off/;
	D.setInvalidType(true);
	}
	// OpenCL doesn't support variadic functions and blocks
	// (s6.9.e and s6.12.5 OpenCL v2.0) except for printf.
	// We also allow here any toolchain reserved identifiers.
	if (FTI.isVariadic &&
	!S.getOpenCLOptions().isAvailableOption(
	"__cl_clang_variadic_functions", S.getLangOpts()) &&
	!(D.getIdentifier() &&
	((D.getIdentifier()->getName() == "printf" &&
	LangOpts.getOpenCLCompatibleVersion() >= 120) \|\|
	D.getIdentifier()->getName().startswith("__")))) {
	S.Diag(D.getIdentifierLoc(), diag::err_opencl_variadic_function);
	D.setInvalidType(true);
	}
	}

	// Methods cannot return interface types. All ObjC objects are
	// passed by reference.
	if (T->isObjCObjectType()) {
	SourceLocation DiagLoc, FixitLoc;
	if (TInfo) {
	DiagLoc = TInfo->getTypeLoc().getBeginLoc();
	FixitLoc = S.getLocForEndOfToken(TInfo->getTypeLoc().getEndLoc());
	} else {
	DiagLoc = D.getDeclSpec().getTypeSpecTypeLoc();
	FixitLoc = S.getLocForEndOfToken(D.getDeclSpec().getEndLoc());
	}
	S.Diag(DiagLoc, diag::err_object_cannot_be_passed_returned_by_value)
	<< 0 << T
	<< FixItHint::CreateInsertion(FixitLoc, "*");

	T = Context.getObjCObjectPointerType(T);
	if (TInfo) {
	TypeLocBuilder TLB;
	TLB.pushFullCopy(TInfo->getTypeLoc());
	ObjCObjectPointerTypeLoc TLoc = TLB.push<ObjCObjectPointerTypeLoc>(T);
	TLoc.setStarLoc(FixitLoc);
	TInfo = TLB.getTypeSourceInfo(Context, T);
	}

	D.setInvalidType(true);
	}

	// cv-qualifiers on return types are pointless except when the type is a
	// class type in C++.
	if ((T.getCVRQualifiers() \|\| T->isAtomicType()) &&
	!(S.getLangOpts().CPlusPlus &&
	(T->isDependentType() \|\| T->isRecordType()))) {
	if (T->isVoidType() && !S.getLangOpts().CPlusPlus &&
	D.getFunctionDefinitionKind() ==
	FunctionDefinitionKind::Definition) {
	// [6.9.1/3] qualified void return is invalid on a C
	// function definition. Apparently ok on declarations and
	// in C++ though (!)
	S.Diag(DeclType.Loc, diag::err_func_returning_qualified_void) << T;
	} else
	diagnoseRedundantReturnTypeQualifiers(S, T, D, chunkIndex);

	// C++2a [dcl.fct]p12:
	// A volatile-qualified return type is deprecated
	if (T.isVolatileQualified() && S.getLangOpts().CPlusPlus20)
	S.Diag(DeclType.Loc, diag::warn_deprecated_volatile_return) << T;
	}

	// Objective-C ARC ownership qualifiers are ignored on the function
	// return type (by type canonicalization). Complain if this attribute
	// was written here.
	if (T.getQualifiers().hasObjCLifetime()) {
	SourceLocation AttrLoc;
	if (chunkIndex + 1 < D.getNumTypeObjects()) {
	DeclaratorChunk ReturnTypeChunk = D.getTypeObject(chunkIndex + 1);
	for (const ParsedAttr &AL : ReturnTypeChunk.getAttrs()) {
	if (AL.getKind() == ParsedAttr::AT_ObjCOwnership) {
	AttrLoc = AL.getLoc();
	break;
	}
	}
	}
	if (AttrLoc.isInvalid()) {
	for (const ParsedAttr &AL : D.getDeclSpec().getAttributes()) {
	if (AL.getKind() == ParsedAttr::AT_ObjCOwnership) {
	AttrLoc = AL.getLoc();
	break;
	}
	}
	}

	if (AttrLoc.isValid()) {
	// The ownership attributes are almost always written via
	// the predefined
	// __strong/__weak/__autoreleasing/__unsafe_unretained.
	if (AttrLoc.isMacroID())
	AttrLoc =
	S.SourceMgr.getImmediateExpansionRange(AttrLoc).getBegin();

	S.Diag(AttrLoc, diag::warn_arc_lifetime_result_type)
	<< T.getQualifiers().getObjCLifetime();
	}
	}

	if (LangOpts.CPlusPlus && D.getDeclSpec().hasTagDefinition()) {
	// C++ [dcl.fct]p6:
	// Types shall not be defined in return or parameter types.
	TagDecl *Tag = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
	S.Diag(Tag->getLocation(), diag::err_type_defined_in_result_type)
	<< Context.getTypeDeclType(Tag);
	}

	// Exception specs are not allowed in typedefs. Complain, but add it
	// anyway.
	if (IsTypedefName && FTI.getExceptionSpecType() && !LangOpts.CPlusPlus17)
	S.Diag(FTI.getExceptionSpecLocBeg(),
	diag::err_exception_spec_in_typedef)
	<< (D.getContext() == DeclaratorContext::AliasDecl \|\|
	D.getContext() == DeclaratorContext::AliasTemplate);

	// If we see "T var();" or "T var(T());" at block scope, it is probably
	// an attempt to initialize a variable, not a function declaration.
	if (FTI.isAmbiguous)
	warnAboutAmbiguousFunction(S, D, DeclType, T);

	FunctionType::ExtInfo EI(
	getCCForDeclaratorChunk(S, D, DeclType.getAttrs(), FTI, chunkIndex));

	// OpenCL disallows functions without a prototype, but it doesn't enforce
	// strict prototypes as in C2x because it allows a function definition to
	// have an identifier list. See OpenCL 3.0 6.11/g for more details.
	if (!FTI.NumParams && !FTI.isVariadic &&
	!LangOpts.requiresStrictPrototypes() && !LangOpts.OpenCL) {
	// Simple void foo(), where the incoming T is the result type.
	T = Context.getFunctionNoProtoType(T, EI);
	} else {
	// We allow a zero-parameter variadic function in C if the
	// function is marked with the "overloadable" attribute. Scan
	// for this attribute now.
	if (!FTI.NumParams && FTI.isVariadic && !LangOpts.CPlusPlus)
	if (!D.getDeclarationAttributes().hasAttribute(
	ParsedAttr::AT_Overloadable) &&
	!D.getAttributes().hasAttribute(ParsedAttr::AT_Overloadable) &&
	!D.getDeclSpec().getAttributes().hasAttribute(
	ParsedAttr::AT_Overloadable))
	S.Diag(FTI.getEllipsisLoc(), diag::err_ellipsis_first_param);

	if (FTI.NumParams && FTI.Params[0].Param == nullptr) {
	// C99 6.7.5.3p3: Reject int(x,y,z) when it's not a function
	// definition.
	S.Diag(FTI.Params[0].IdentLoc,
	diag::err_ident_list_in_fn_declaration);
	D.setInvalidType(true);
	// Recover by creating a K&R-style function type, if possible.
	T = (!LangOpts.requiresStrictPrototypes() && !LangOpts.OpenCL)
	? Context.getFunctionNoProtoType(T, EI)
	: Context.IntTy;
	break;
	}

	FunctionProtoType::ExtProtoInfo EPI;
	EPI.ExtInfo = EI;
	EPI.Variadic = FTI.isVariadic;
	EPI.EllipsisLoc = FTI.getEllipsisLoc();
	EPI.HasTrailingReturn = FTI.hasTrailingReturnType();
	EPI.TypeQuals.addCVRUQualifiers(
	FTI.MethodQualifiers ? FTI.MethodQualifiers->getTypeQualifiers()
	: 0);
	EPI.RefQualifier = !FTI.hasRefQualifier()? RQ_None
	: FTI.RefQualifierIsLValueRef? RQ_LValue
	: RQ_RValue;

	// Otherwise, we have a function with a parameter list that is
	// potentially variadic.
	SmallVector<QualType, 16> ParamTys;
	ParamTys.reserve(FTI.NumParams);

	SmallVector<FunctionProtoType::ExtParameterInfo, 16>
	ExtParameterInfos(FTI.NumParams);
	bool HasAnyInterestingExtParameterInfos = false;

	for (unsigned i = 0, e = FTI.NumParams; i != e; ++i) {
	ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
	QualType ParamTy = Param->getType();
	assert(!ParamTy.isNull() && "Couldn't parse type?");

	// Look for 'void'. void is allowed only as a single parameter to a
	// function with no other parameters (C99 6.7.5.3p10). We record
	// int(void) as a FunctionProtoType with an empty parameter list.
	if (ParamTy->isVoidType()) {
	// If this is something like 'float(int, void)', reject it. 'void'
	// is an incomplete type (C99 6.2.5p19) and function decls cannot
	// have parameters of incomplete type.
	if (FTI.NumParams != 1 \|\| FTI.isVariadic) {
	S.Diag(FTI.Params[i].IdentLoc, diag::err_void_only_param);
	ParamTy = Context.IntTy;
	Param->setType(ParamTy);
	} else if (FTI.Params[i].Ident) {
	// Reject, but continue to parse 'int(void abc)'.
	S.Diag(FTI.Params[i].IdentLoc, diag::err_param_with_void_type);
	ParamTy = Context.IntTy;
	Param->setType(ParamTy);
	} else {
	// Reject, but continue to parse 'float(const void)'.
	if (ParamTy.hasQualifiers())
	S.Diag(DeclType.Loc, diag::err_void_param_qualified);

	// Do not add 'void' to the list.
	break;
	}
	} else if (ParamTy->isHalfType()) {
	// Disallow half FP parameters.
	// FIXME: This really should be in BuildFunctionType.
	if (S.getLangOpts().OpenCL) {
	if (!S.getOpenCLOptions().isAvailableOption("cl_khr_fp16",
	S.getLangOpts())) {
	S.Diag(Param->getLocation(), diag::err_opencl_invalid_param)
	<< ParamTy << 0;
	D.setInvalidType();
	Param->setInvalidDecl();
	}
	} else if (!S.getLangOpts().HalfArgsAndReturns) {
	S.Diag(Param->getLocation(),
	diag::err_parameters_retval_cannot_have_fp16_type) << 0;
	D.setInvalidType();
	}
	} else if (!FTI.hasPrototype) {
	if (ParamTy->isPromotableIntegerType()) {
	ParamTy = Context.getPromotedIntegerType(ParamTy);
	Param->setKNRPromoted(true);
	} else if (const BuiltinType* BTy = ParamTy->getAs<BuiltinType>()) {
	if (BTy->getKind() == BuiltinType::Float) {
	ParamTy = Context.DoubleTy;
	Param->setKNRPromoted(true);
	}
	}
	} else if (S.getLangOpts().OpenCL && ParamTy->isBlockPointerType()) {
	// OpenCL 2.0 s6.12.5: A block cannot be a parameter of a function.
	S.Diag(Param->getLocation(), diag::err_opencl_invalid_param)
	<< ParamTy << 1 /hint off/;
	D.setInvalidType();
	}

	if (LangOpts.ObjCAutoRefCount && Param->hasAttr<NSConsumedAttr>()) {
	ExtParameterInfos[i] = ExtParameterInfos[i].withIsConsumed(true);
	HasAnyInterestingExtParameterInfos = true;
	}

	if (auto attr = Param->getAttr<ParameterABIAttr>()) {
	ExtParameterInfos[i] =
	ExtParameterInfos[i].withABI(attr->getABI());
	HasAnyInterestingExtParameterInfos = true;
	}

	if (Param->hasAttr<PassObjectSizeAttr>()) {
	ExtParameterInfos[i] = ExtParameterInfos[i].withHasPassObjectSize();
	HasAnyInterestingExtParameterInfos = true;
	}

	if (Param->hasAttr<NoEscapeAttr>()) {
	ExtParameterInfos[i] = ExtParameterInfos[i].withIsNoEscape(true);
	HasAnyInterestingExtParameterInfos = true;
	}

	ParamTys.push_back(ParamTy);
	}

	if (HasAnyInterestingExtParameterInfos) {
	EPI.ExtParameterInfos = ExtParameterInfos.data();
	checkExtParameterInfos(S, ParamTys, EPI,
	[&](unsigned i) { return FTI.Params[i].Param->getLocation(); });
	}

	SmallVector<QualType, 4> Exceptions;
	SmallVector<ParsedType, 2> DynamicExceptions;
	SmallVector<SourceRange, 2> DynamicExceptionRanges;
	Expr *NoexceptExpr = nullptr;

	if (FTI.getExceptionSpecType() == EST_Dynamic) {
	// FIXME: It's rather inefficient to have to split into two vectors
	// here.
	unsigned N = FTI.getNumExceptions();
	DynamicExceptions.reserve(N);
	DynamicExceptionRanges.reserve(N);
	for (unsigned I = 0; I != N; ++I) {
	DynamicExceptions.push_back(FTI.Exceptions[I].Ty);
	DynamicExceptionRanges.push_back(FTI.Exceptions[I].Range);
	}
	} else if (isComputedNoexcept(FTI.getExceptionSpecType())) {
	NoexceptExpr = FTI.NoexceptExpr;
	}

	S.checkExceptionSpecification(D.isFunctionDeclarationContext(),
	FTI.getExceptionSpecType(),
	DynamicExceptions,
	DynamicExceptionRanges,
	NoexceptExpr,
	Exceptions,
	EPI.ExceptionSpec);

	// FIXME: Set address space from attrs for C++ mode here.
	// OpenCLCPlusPlus: A class member function has an address space.
	auto IsClassMember = [&]() {
	return (!state.getDeclarator().getCXXScopeSpec().isEmpty() &&
	state.getDeclarator()
	.getCXXScopeSpec()
	.getScopeRep()
	->getKind() == NestedNameSpecifier::TypeSpec) \|\|
	state.getDeclarator().getContext() ==
	DeclaratorContext::Member \|\|
	state.getDeclarator().getContext() ==
	DeclaratorContext::LambdaExpr;
	};

	if (state.getSema().getLangOpts().OpenCLCPlusPlus && IsClassMember()) {
	LangAS ASIdx = LangAS::Default;
	// Take address space attr if any and mark as invalid to avoid adding
	// them later while creating QualType.
	if (FTI.MethodQualifiers)
	for (ParsedAttr &attr : FTI.MethodQualifiers->getAttributes()) {
	LangAS ASIdxNew = attr.asOpenCLLangAS();
	if (DiagnoseMultipleAddrSpaceAttributes(S, ASIdx, ASIdxNew,
	attr.getLoc()))
	D.setInvalidType(true);
	else
	ASIdx = ASIdxNew;
	}
	// If a class member function's address space is not set, set it to
	// __generic.
	LangAS AS =
	(ASIdx == LangAS::Default ? S.getDefaultCXXMethodAddrSpace()
	: ASIdx);
	EPI.TypeQuals.addAddressSpace(AS);
	}
	T = Context.getFunctionType(T, ParamTys, EPI);
	}
	break;
	}
	case DeclaratorChunk::MemberPointer: {
	// The scope spec must refer to a class, or be dependent.
	CXXScopeSpec &SS = DeclType.Mem.Scope();
	QualType ClsType;

	// Handle pointer nullability.
	inferPointerNullability(SimplePointerKind::MemberPointer, DeclType.Loc,
	DeclType.EndLoc, DeclType.getAttrs(),
	state.getDeclarator().getAttributePool());

	if (SS.isInvalid()) {
	// Avoid emitting extra errors if we already errored on the scope.
	D.setInvalidType(true);
	} else if (S.isDependentScopeSpecifier(SS) \|\|
	isa_and_nonnull<CXXRecordDecl>(S.computeDeclContext(SS))) {
	NestedNameSpecifier *NNS = SS.getScopeRep();
	NestedNameSpecifier *NNSPrefix = NNS->getPrefix();
	switch (NNS->getKind()) {
	case NestedNameSpecifier::Identifier:
	ClsType = Context.getDependentNameType(ETK_None, NNSPrefix,
	NNS->getAsIdentifier());
	break;

	case NestedNameSpecifier::Namespace:
	case NestedNameSpecifier::NamespaceAlias:
	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	llvm_unreachable("Nested-name-specifier must name a type");

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	ClsType = QualType(NNS->getAsType(), 0);
	// Note: if the NNS has a prefix and ClsType is a nondependent
	// TemplateSpecializationType, then the NNS prefix is NOT included
	// in ClsType; hence we wrap ClsType into an ElaboratedType.
	// NOTE: in particular, no wrap occurs if ClsType already is an
	// Elaborated, DependentName, or DependentTemplateSpecialization.
	if (NNSPrefix && isa<TemplateSpecializationType>(NNS->getAsType()))
	ClsType = Context.getElaboratedType(ETK_None, NNSPrefix, ClsType);
	break;
	}
	} else {
	S.Diag(DeclType.Mem.Scope().getBeginLoc(),
	diag::err_illegal_decl_mempointer_in_nonclass)
	<< (D.getIdentifier() ? D.getIdentifier()->getName() : "type name")
	<< DeclType.Mem.Scope().getRange();
	D.setInvalidType(true);
	}

	if (!ClsType.isNull())
	T = S.BuildMemberPointerType(T, ClsType, DeclType.Loc,
	D.getIdentifier());
	if (T.isNull()) {
	T = Context.IntTy;
	D.setInvalidType(true);
	} else if (DeclType.Mem.TypeQuals) {
	T = S.BuildQualifiedType(T, DeclType.Loc, DeclType.Mem.TypeQuals);
	}
	break;
	}

	case DeclaratorChunk::Pipe: {
	T = S.BuildReadPipeType(T, DeclType.Loc);
	processTypeAttrs(state, T, TAL_DeclSpec,
	D.getMutableDeclSpec().getAttributes());
	break;
	}
	}

	if (T.isNull()) {
	D.setInvalidType(true);
	T = Context.IntTy;
	}

	// See if there are any attributes on this declarator chunk.
	processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs());

	if (DeclType.Kind != DeclaratorChunk::Paren) {
	if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType))
	S.Diag(DeclType.Loc, diag::warn_noderef_on_non_pointer_or_array);

	ExpectNoDerefChunk = state.didParseNoDeref();
	}
	}

	if (ExpectNoDerefChunk)
	S.Diag(state.getDeclarator().getBeginLoc(),
	diag::warn_noderef_on_non_pointer_or_array);

	// GNU warning -Wstrict-prototypes
	// Warn if a function declaration or definition is without a prototype.
	// This warning is issued for all kinds of unprototyped function
	// declarations (i.e. function type typedef, function pointer etc.)
	// C99 6.7.5.3p14:
	// The empty list in a function declarator that is not part of a definition
	// of that function specifies that no information about the number or types
	// of the parameters is supplied.
	// See ActOnFinishFunctionBody() and MergeFunctionDecl() for handling of
	// function declarations whose behavior changes in C2x.
	if (!LangOpts.requiresStrictPrototypes()) {
	bool IsBlock = false;
	for (const DeclaratorChunk &DeclType : D.type_objects()) {
	switch (DeclType.Kind) {
	case DeclaratorChunk::BlockPointer:
	IsBlock = true;
	break;
	case DeclaratorChunk::Function: {
	const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
	// We suppress the warning when there's no LParen location, as this
	// indicates the declaration was an implicit declaration, which gets
	// warned about separately via -Wimplicit-function-declaration. We also
	// suppress the warning when we know the function has a prototype.
	if (!FTI.hasPrototype && FTI.NumParams == 0 && !FTI.isVariadic &&
	FTI.getLParenLoc().isValid())
	S.Diag(DeclType.Loc, diag::warn_strict_prototypes)
	<< IsBlock
	<< FixItHint::CreateInsertion(FTI.getRParenLoc(), "void");
	IsBlock = false;
	break;
	}
	default:
	break;
	}
	}
	}

	assert(!T.isNull() && "T must not be null after this point");

	if (LangOpts.CPlusPlus && T->isFunctionType()) {
	const FunctionProtoType *FnTy = T->getAs<FunctionProtoType>();
	assert(FnTy && "Why oh why is there not a FunctionProtoType here?");

	// C++ 8.3.5p4:
	// A cv-qualifier-seq shall only be part of the function type
	// for a nonstatic member function, the function type to which a pointer
	// to member refers, or the top-level function type of a function typedef
	// declaration.
	//
	// Core issue 547 also allows cv-qualifiers on function types that are
	// top-level template type arguments.
	enum { NonMember, Member, DeductionGuide } Kind = NonMember;
	if (D.getName().getKind() == UnqualifiedIdKind::IK_DeductionGuideName)
	Kind = DeductionGuide;
	else if (!D.getCXXScopeSpec().isSet()) {
	if ((D.getContext() == DeclaratorContext::Member \|\|
	D.getContext() == DeclaratorContext::LambdaExpr) &&
	!D.getDeclSpec().isFriendSpecified())
	Kind = Member;
	} else {
	DeclContext *DC = S.computeDeclContext(D.getCXXScopeSpec());
	if (!DC \|\| DC->isRecord())
	Kind = Member;
	}

	// C++11 [dcl.fct]p6 (w/DR1417):
	// An attempt to specify a function type with a cv-qualifier-seq or a
	// ref-qualifier (including by typedef-name) is ill-formed unless it is:
	// - the function type for a non-static member function,
	// - the function type to which a pointer to member refers,
	// - the top-level function type of a function typedef declaration or
	// alias-declaration,
	// - the type-id in the default argument of a type-parameter, or
	// - the type-id of a template-argument for a type-parameter
	//
	// FIXME: Checking this here is insufficient. We accept-invalid on:
	//
	// template<typename T> struct S { void f(T); };
	// S<int() const> s;
	//
	// ... for instance.
	if (IsQualifiedFunction &&
	!(Kind == Member &&
	D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_static) &&
	!IsTypedefName && D.getContext() != DeclaratorContext::TemplateArg &&
	D.getContext() != DeclaratorContext::TemplateTypeArg) {
	SourceLocation Loc = D.getBeginLoc();
	SourceRange RemovalRange;
	unsigned I;
	if (D.isFunctionDeclarator(I)) {
	SmallVector<SourceLocation, 4> RemovalLocs;
	const DeclaratorChunk &Chunk = D.getTypeObject(I);
	assert(Chunk.Kind == DeclaratorChunk::Function);

	if (Chunk.Fun.hasRefQualifier())
	RemovalLocs.push_back(Chunk.Fun.getRefQualifierLoc());

	if (Chunk.Fun.hasMethodTypeQualifiers())
	Chunk.Fun.MethodQualifiers->forEachQualifier(
	[&](DeclSpec::TQ TypeQual, StringRef QualName,
	SourceLocation SL) { RemovalLocs.push_back(SL); });

	if (!RemovalLocs.empty()) {
	llvm::sort(RemovalLocs,
	BeforeThanCompare<SourceLocation>(S.getSourceManager()));
	RemovalRange = SourceRange(RemovalLocs.front(), RemovalLocs.back());
	Loc = RemovalLocs.front();
	}
	}

	S.Diag(Loc, diag::err_invalid_qualified_function_type)
	<< Kind << D.isFunctionDeclarator() << T
	<< getFunctionQualifiersAsString(FnTy)
	<< FixItHint::CreateRemoval(RemovalRange);

	// Strip the cv-qualifiers and ref-qualifiers from the type.
	FunctionProtoType::ExtProtoInfo EPI = FnTy->getExtProtoInfo();
	EPI.TypeQuals.removeCVRQualifiers();
	EPI.RefQualifier = RQ_None;

	T = Context.getFunctionType(FnTy->getReturnType(), FnTy->getParamTypes(),
	EPI);
	// Rebuild any parens around the identifier in the function type.
	for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
	if (D.getTypeObject(i).Kind != DeclaratorChunk::Paren)
	break;
	T = S.BuildParenType(T);
	}
	}
	}

	// Apply any undistributed attributes from the declaration or declarator.
	ParsedAttributesView NonSlidingAttrs;
	for (ParsedAttr &AL : D.getDeclarationAttributes()) {
	if (!AL.slidesFromDeclToDeclSpecLegacyBehavior()) {
	NonSlidingAttrs.addAtEnd(&AL);
	}
	}
	processTypeAttrs(state, T, TAL_DeclName, NonSlidingAttrs);
	processTypeAttrs(state, T, TAL_DeclName, D.getAttributes());

	// Diagnose any ignored type attributes.
	state.diagnoseIgnoredTypeAttrs(T);

	// C++0x [dcl.constexpr]p9:
	// A constexpr specifier used in an object declaration declares the object
	// as const.
	if (D.getDeclSpec().getConstexprSpecifier() == ConstexprSpecKind::Constexpr &&
	T->isObjectType())
	T.addConst();

	// C++2a [dcl.fct]p4:
	// A parameter with volatile-qualified type is deprecated
	if (T.isVolatileQualified() && S.getLangOpts().CPlusPlus20 &&
	(D.getContext() == DeclaratorContext::Prototype \|\|
	D.getContext() == DeclaratorContext::LambdaExprParameter))
	S.Diag(D.getIdentifierLoc(), diag::warn_deprecated_volatile_param) << T;

	// If there was an ellipsis in the declarator, the declaration declares a
	// parameter pack whose type may be a pack expansion type.
	if (D.hasEllipsis()) {
	// C++0x [dcl.fct]p13:
	// A declarator-id or abstract-declarator containing an ellipsis shall
	// only be used in a parameter-declaration. Such a parameter-declaration
	// is a parameter pack (14.5.3). [...]
	switch (D.getContext()) {
	case DeclaratorContext::Prototype:
	case DeclaratorContext::LambdaExprParameter:
	case DeclaratorContext::RequiresExpr:
	// C++0x [dcl.fct]p13:
	// [...] When it is part of a parameter-declaration-clause, the
	// parameter pack is a function parameter pack (14.5.3). The type T
	// of the declarator-id of the function parameter pack shall contain
	// a template parameter pack; each template parameter pack in T is
	// expanded by the function parameter pack.
	//
	// We represent function parameter packs as function parameters whose
	// type is a pack expansion.
	if (!T->containsUnexpandedParameterPack() &&
	(!LangOpts.CPlusPlus20 \|\| !T->getContainedAutoType())) {
	S.Diag(D.getEllipsisLoc(),
	diag::err_function_parameter_pack_without_parameter_packs)
	<< T << D.getSourceRange();
	D.setEllipsisLoc(SourceLocation());
	} else {
	T = Context.getPackExpansionType(T, None, /ExpectPackInType=/false);
	}
	break;
	case DeclaratorContext::TemplateParam:
	// C++0x [temp.param]p15:
	// If a template-parameter is a [...] is a parameter-declaration that
	// declares a parameter pack (8.3.5), then the template-parameter is a
	// template parameter pack (14.5.3).
	//
	// Note: core issue 778 clarifies that, if there are any unexpanded
	// parameter packs in the type of the non-type template parameter, then
	// it expands those parameter packs.
	if (T->containsUnexpandedParameterPack())
	T = Context.getPackExpansionType(T, None);
	else
	S.Diag(D.getEllipsisLoc(),
	LangOpts.CPlusPlus11
	? diag::warn_cxx98_compat_variadic_templates
	: diag::ext_variadic_templates);
	break;

	case DeclaratorContext::File:
	case DeclaratorContext::KNRTypeList:
	case DeclaratorContext::ObjCParameter: // FIXME: special diagnostic here?
	case DeclaratorContext::ObjCResult: // FIXME: special diagnostic here?
	case DeclaratorContext::TypeName:
	case DeclaratorContext::FunctionalCast:
	case DeclaratorContext::CXXNew:
	case DeclaratorContext::AliasDecl:
	case DeclaratorContext::AliasTemplate:
	case DeclaratorContext::Member:
	case DeclaratorContext::Block:
	case DeclaratorContext::ForInit:
	case DeclaratorContext::SelectionInit:
	case DeclaratorContext::Condition:
	case DeclaratorContext::CXXCatch:
	case DeclaratorContext::ObjCCatch:
	case DeclaratorContext::BlockLiteral:
	case DeclaratorContext::LambdaExpr:
	case DeclaratorContext::ConversionId:
	case DeclaratorContext::TrailingReturn:
	case DeclaratorContext::TrailingReturnVar:
	case DeclaratorContext::TemplateArg:
	case DeclaratorContext::TemplateTypeArg:
	case DeclaratorContext::Association:
	// FIXME: We may want to allow parameter packs in block-literal contexts
	// in the future.
	S.Diag(D.getEllipsisLoc(),
	diag::err_ellipsis_in_declarator_not_parameter);
	D.setEllipsisLoc(SourceLocation());
	break;
	}
	}

	assert(!T.isNull() && "T must not be null at the end of this function");
	if (D.isInvalidType())
	return Context.getTrivialTypeSourceInfo(T);

	return GetTypeSourceInfoForDeclarator(state, T, TInfo);
	}

	/// GetTypeForDeclarator - Convert the type for the specified
	/// declarator to Type instances.
	///
	/// The result of this call will never be null, but the associated
	/// type may be a null type if there's an unrecoverable error.
	TypeSourceInfo Sema::GetTypeForDeclarator(Declarator &D, Scope S) {
	// Determine the type of the declarator. Not all forms of declarator
	// have a type.

	TypeProcessingState state(*this, D);

	TypeSourceInfo *ReturnTypeInfo = nullptr;
	QualType T = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo);
	if (D.isPrototypeContext() && getLangOpts().ObjCAutoRefCount)
	inferARCWriteback(state, T);

	return GetFullTypeForDeclarator(state, T, ReturnTypeInfo);
	}

	static void transferARCOwnershipToDeclSpec(Sema &S,
	QualType &declSpecTy,
	Qualifiers::ObjCLifetime ownership) {
	if (declSpecTy->isObjCRetainableType() &&
	declSpecTy.getObjCLifetime() == Qualifiers::OCL_None) {
	Qualifiers qs;
	qs.addObjCLifetime(ownership);
	declSpecTy = S.Context.getQualifiedType(declSpecTy, qs);
	}
	}

	static void transferARCOwnershipToDeclaratorChunk(TypeProcessingState &state,
	Qualifiers::ObjCLifetime ownership,
	unsigned chunkIndex) {
	Sema &S = state.getSema();
	Declarator &D = state.getDeclarator();

	// Look for an explicit lifetime attribute.
	DeclaratorChunk &chunk = D.getTypeObject(chunkIndex);
	if (chunk.getAttrs().hasAttribute(ParsedAttr::AT_ObjCOwnership))
	return;

	const char *attrStr = nullptr;
	switch (ownership) {
	case Qualifiers::OCL_None: llvm_unreachable("no ownership!");
	case Qualifiers::OCL_ExplicitNone: attrStr = "none"; break;
	case Qualifiers::OCL_Strong: attrStr = "strong"; break;
	case Qualifiers::OCL_Weak: attrStr = "weak"; break;
	case Qualifiers::OCL_Autoreleasing: attrStr = "autoreleasing"; break;
	}

	IdentifierLoc *Arg = new (S.Context) IdentifierLoc;
	Arg->Ident = &S.Context.Idents.get(attrStr);
	Arg->Loc = SourceLocation();

	ArgsUnion Args(Arg);

	// If there wasn't one, add one (with an invalid source location
	// so that we don't make an AttributedType for it).
	ParsedAttr *attr = D.getAttributePool().create(
	&S.Context.Idents.get("objc_ownership"), SourceLocation(),
	/scope/ nullptr, SourceLocation(),
	/args/ &Args, 1, ParsedAttr::AS_GNU);
	chunk.getAttrs().addAtEnd(attr);
	// TODO: mark whether we did this inference?
	}

	/// Used for transferring ownership in casts resulting in l-values.
	static void transferARCOwnership(TypeProcessingState &state,
	QualType &declSpecTy,
	Qualifiers::ObjCLifetime ownership) {
	Sema &S = state.getSema();
	Declarator &D = state.getDeclarator();

	int inner = -1;
	bool hasIndirection = false;
	for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
	DeclaratorChunk &chunk = D.getTypeObject(i);
	switch (chunk.Kind) {
	case DeclaratorChunk::Paren:
	// Ignore parens.
	break;

	case DeclaratorChunk::Array:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::Pointer:
	if (inner != -1)
	hasIndirection = true;
	inner = i;
	break;

	case DeclaratorChunk::BlockPointer:
	if (inner != -1)
	transferARCOwnershipToDeclaratorChunk(state, ownership, i);
	return;

	case DeclaratorChunk::Function:
	case DeclaratorChunk::MemberPointer:
	case DeclaratorChunk::Pipe:
	return;
	}
	}

	if (inner == -1)
	return;

	DeclaratorChunk &chunk = D.getTypeObject(inner);
	if (chunk.Kind == DeclaratorChunk::Pointer) {
	if (declSpecTy->isObjCRetainableType())
	return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership);
	if (declSpecTy->isObjCObjectType() && hasIndirection)
	return transferARCOwnershipToDeclaratorChunk(state, ownership, inner);
	} else {
	assert(chunk.Kind == DeclaratorChunk::Array \|\|
	chunk.Kind == DeclaratorChunk::Reference);
	return transferARCOwnershipToDeclSpec(S, declSpecTy, ownership);
	}
	}

	TypeSourceInfo *Sema::GetTypeForDeclaratorCast(Declarator &D, QualType FromTy) {
	TypeProcessingState state(*this, D);

	TypeSourceInfo *ReturnTypeInfo = nullptr;
	QualType declSpecTy = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo);

	if (getLangOpts().ObjC) {
	Qualifiers::ObjCLifetime ownership = Context.getInnerObjCOwnership(FromTy);
	if (ownership != Qualifiers::OCL_None)
	transferARCOwnership(state, declSpecTy, ownership);
	}

	return GetFullTypeForDeclarator(state, declSpecTy, ReturnTypeInfo);
	}

	static void fillAttributedTypeLoc(AttributedTypeLoc TL,
	TypeProcessingState &State) {
	TL.setAttr(State.takeAttrForAttributedType(TL.getTypePtr()));
	}

	namespace {
	class TypeSpecLocFiller : public TypeLocVisitor<TypeSpecLocFiller> {
	Sema &SemaRef;
	ASTContext &Context;
	TypeProcessingState &State;
	const DeclSpec &DS;

	public:
	TypeSpecLocFiller(Sema &S, ASTContext &Context, TypeProcessingState &State,
	const DeclSpec &DS)
	: SemaRef(S), Context(Context), State(State), DS(DS) {}

	void VisitAttributedTypeLoc(AttributedTypeLoc TL) {
	Visit(TL.getModifiedLoc());
	fillAttributedTypeLoc(TL, State);
	}
	void VisitBTFTagAttributedTypeLoc(BTFTagAttributedTypeLoc TL) {
	Visit(TL.getWrappedLoc());
	}
	void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
	Visit(TL.getInnerLoc());
	TL.setExpansionLoc(
	State.getExpansionLocForMacroQualifiedType(TL.getTypePtr()));
	}
	void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
	Visit(TL.getUnqualifiedLoc());
	}
	// Allow to fill pointee's type locations, e.g.,
	// int __attr * __attr * __attr *p;
	void VisitPointerTypeLoc(PointerTypeLoc TL) { Visit(TL.getNextTypeLoc()); }
	void VisitTypedefTypeLoc(TypedefTypeLoc TL) {
	TL.setNameLoc(DS.getTypeSpecTypeLoc());
	}
	void VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
	TL.setNameLoc(DS.getTypeSpecTypeLoc());
	// FIXME. We should have DS.getTypeSpecTypeEndLoc(). But, it requires
	// addition field. What we have is good enough for display of location
	// of 'fixit' on interface name.
	TL.setNameEndLoc(DS.getEndLoc());
	}
	void VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
	TypeSourceInfo *RepTInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo);
	TL.copy(RepTInfo->getTypeLoc());
	}
	void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
	TypeSourceInfo *RepTInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &RepTInfo);
	TL.copy(RepTInfo->getTypeLoc());
	}
	void VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) {
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);

	// If we got no declarator info from previous Sema routines,
	// just fill with the typespec loc.
	if (!TInfo) {
	TL.initialize(Context, DS.getTypeSpecTypeNameLoc());
	return;
	}

	TypeLoc OldTL = TInfo->getTypeLoc();
	if (TInfo->getType()->getAs<ElaboratedType>()) {
	ElaboratedTypeLoc ElabTL = OldTL.castAs<ElaboratedTypeLoc>();
	TemplateSpecializationTypeLoc NamedTL = ElabTL.getNamedTypeLoc()
	.castAs<TemplateSpecializationTypeLoc>();
	TL.copy(NamedTL);
	} else {
	TL.copy(OldTL.castAs<TemplateSpecializationTypeLoc>());
	assert(TL.getRAngleLoc() == OldTL.castAs<TemplateSpecializationTypeLoc>().getRAngleLoc());
	}

	}
	void VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) {
	assert(DS.getTypeSpecType() == DeclSpec::TST_typeofExpr);
	TL.setTypeofLoc(DS.getTypeSpecTypeLoc());
	TL.setParensRange(DS.getTypeofParensRange());
	}
	void VisitTypeOfTypeLoc(TypeOfTypeLoc TL) {
	assert(DS.getTypeSpecType() == DeclSpec::TST_typeofType);
	TL.setTypeofLoc(DS.getTypeSpecTypeLoc());
	TL.setParensRange(DS.getTypeofParensRange());
	assert(DS.getRepAsType());
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	TL.setUnderlyingTInfo(TInfo);
	}
	void VisitDecltypeTypeLoc(DecltypeTypeLoc TL) {
	assert(DS.getTypeSpecType() == DeclSpec::TST_decltype);
	TL.setDecltypeLoc(DS.getTypeSpecTypeLoc());
	TL.setRParenLoc(DS.getTypeofParensRange().getEnd());
	}
	void VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
	// FIXME: This holds only because we only have one unary transform.
	assert(DS.getTypeSpecType() == DeclSpec::TST_underlyingType);
	TL.setKWLoc(DS.getTypeSpecTypeLoc());
	TL.setParensRange(DS.getTypeofParensRange());
	assert(DS.getRepAsType());
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	TL.setUnderlyingTInfo(TInfo);
	}
	void VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
	// By default, use the source location of the type specifier.
	TL.setBuiltinLoc(DS.getTypeSpecTypeLoc());
	if (TL.needsExtraLocalData()) {
	// Set info for the written builtin specifiers.
	TL.getWrittenBuiltinSpecs() = DS.getWrittenBuiltinSpecs();
	// Try to have a meaningful source location.
	if (TL.getWrittenSignSpec() != TypeSpecifierSign::Unspecified)
	TL.expandBuiltinRange(DS.getTypeSpecSignLoc());
	if (TL.getWrittenWidthSpec() != TypeSpecifierWidth::Unspecified)
	TL.expandBuiltinRange(DS.getTypeSpecWidthRange());
	}
	}
	void VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) {
	ElaboratedTypeKeyword Keyword
	= TypeWithKeyword::getKeywordForTypeSpec(DS.getTypeSpecType());
	if (DS.getTypeSpecType() == TST_typename) {
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	if (TInfo) {
	TL.copy(TInfo->getTypeLoc().castAs<ElaboratedTypeLoc>());
	return;
	}
	}
	TL.setElaboratedKeywordLoc(Keyword != ETK_None
	? DS.getTypeSpecTypeLoc()
	: SourceLocation());
	const CXXScopeSpec& SS = DS.getTypeSpecScope();
	TL.setQualifierLoc(SS.getWithLocInContext(Context));
	Visit(TL.getNextTypeLoc().getUnqualifiedLoc());
	}
	void VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
	assert(DS.getTypeSpecType() == TST_typename);
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	assert(TInfo);
	TL.copy(TInfo->getTypeLoc().castAs<DependentNameTypeLoc>());
	}
	void VisitDependentTemplateSpecializationTypeLoc(
	DependentTemplateSpecializationTypeLoc TL) {
	assert(DS.getTypeSpecType() == TST_typename);
	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	assert(TInfo);
	TL.copy(
	TInfo->getTypeLoc().castAs<DependentTemplateSpecializationTypeLoc>());
	}
	void VisitAutoTypeLoc(AutoTypeLoc TL) {
	assert(DS.getTypeSpecType() == TST_auto \|\|
	DS.getTypeSpecType() == TST_decltype_auto \|\|
	DS.getTypeSpecType() == TST_auto_type \|\|
	DS.getTypeSpecType() == TST_unspecified);
	TL.setNameLoc(DS.getTypeSpecTypeLoc());
	if (DS.getTypeSpecType() == TST_decltype_auto)
	TL.setRParenLoc(DS.getTypeofParensRange().getEnd());
	if (!DS.isConstrainedAuto())
	return;
	TemplateIdAnnotation *TemplateId = DS.getRepAsTemplateId();
	if (!TemplateId)
	return;
	if (DS.getTypeSpecScope().isNotEmpty())
	TL.setNestedNameSpecifierLoc(
	DS.getTypeSpecScope().getWithLocInContext(Context));
	else
	TL.setNestedNameSpecifierLoc(NestedNameSpecifierLoc());
	TL.setTemplateKWLoc(TemplateId->TemplateKWLoc);
	TL.setConceptNameLoc(TemplateId->TemplateNameLoc);
	TL.setFoundDecl(nullptr);
	TL.setLAngleLoc(TemplateId->LAngleLoc);
	TL.setRAngleLoc(TemplateId->RAngleLoc);
	if (TemplateId->NumArgs == 0)
	return;
	TemplateArgumentListInfo TemplateArgsInfo;
	ASTTemplateArgsPtr TemplateArgsPtr(TemplateId->getTemplateArgs(),
	TemplateId->NumArgs);
	SemaRef.translateTemplateArguments(TemplateArgsPtr, TemplateArgsInfo);
	for (unsigned I = 0; I < TemplateId->NumArgs; ++I)
	TL.setArgLocInfo(I, TemplateArgsInfo.arguments()[I].getLocInfo());
	}
	void VisitTagTypeLoc(TagTypeLoc TL) {
	TL.setNameLoc(DS.getTypeSpecTypeNameLoc());
	}
	void VisitAtomicTypeLoc(AtomicTypeLoc TL) {
	// An AtomicTypeLoc can come from either an _Atomic(...) type specifier
	// or an _Atomic qualifier.
	if (DS.getTypeSpecType() == DeclSpec::TST_atomic) {
	TL.setKWLoc(DS.getTypeSpecTypeLoc());
	TL.setParensRange(DS.getTypeofParensRange());

	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	assert(TInfo);
	TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
	} else {
	TL.setKWLoc(DS.getAtomicSpecLoc());
	// No parens, to indicate this was spelled as an _Atomic qualifier.
	TL.setParensRange(SourceRange());
	Visit(TL.getValueLoc());
	}
	}

	void VisitPipeTypeLoc(PipeTypeLoc TL) {
	TL.setKWLoc(DS.getTypeSpecTypeLoc());

	TypeSourceInfo *TInfo = nullptr;
	Sema::GetTypeFromParser(DS.getRepAsType(), &TInfo);
	TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
	}

	void VisitExtIntTypeLoc(BitIntTypeLoc TL) {
	TL.setNameLoc(DS.getTypeSpecTypeLoc());
	}

	void VisitDependentExtIntTypeLoc(DependentBitIntTypeLoc TL) {
	TL.setNameLoc(DS.getTypeSpecTypeLoc());
	}

	void VisitTypeLoc(TypeLoc TL) {
	// FIXME: add other typespec types and change this to an assert.
	TL.initialize(Context, DS.getTypeSpecTypeLoc());
	}
	};

	class DeclaratorLocFiller : public TypeLocVisitor<DeclaratorLocFiller> {
	ASTContext &Context;
	TypeProcessingState &State;
	const DeclaratorChunk &Chunk;

	public:
	DeclaratorLocFiller(ASTContext &Context, TypeProcessingState &State,
	const DeclaratorChunk &Chunk)
	: Context(Context), State(State), Chunk(Chunk) {}

	void VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
	llvm_unreachable("qualified type locs not expected here!");
	}
	void VisitDecayedTypeLoc(DecayedTypeLoc TL) {
	llvm_unreachable("decayed type locs not expected here!");
	}

	void VisitAttributedTypeLoc(AttributedTypeLoc TL) {
	fillAttributedTypeLoc(TL, State);
	}
	void VisitBTFTagAttributedTypeLoc(BTFTagAttributedTypeLoc TL) {
	// nothing
	}
	void VisitAdjustedTypeLoc(AdjustedTypeLoc TL) {
	// nothing
	}
	void VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::BlockPointer);
	TL.setCaretLoc(Chunk.Loc);
	}
	void VisitPointerTypeLoc(PointerTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Pointer);
	TL.setStarLoc(Chunk.Loc);
	}
	void VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Pointer);
	TL.setStarLoc(Chunk.Loc);
	}
	void VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::MemberPointer);
	const CXXScopeSpec& SS = Chunk.Mem.Scope();
	NestedNameSpecifierLoc NNSLoc = SS.getWithLocInContext(Context);

	const Type* ClsTy = TL.getClass();
	QualType ClsQT = QualType(ClsTy, 0);
	TypeSourceInfo *ClsTInfo = Context.CreateTypeSourceInfo(ClsQT, 0);
	// Now copy source location info into the type loc component.
	TypeLoc ClsTL = ClsTInfo->getTypeLoc();
	switch (NNSLoc.getNestedNameSpecifier()->getKind()) {
	case NestedNameSpecifier::Identifier:
	assert(isa<DependentNameType>(ClsTy) && "Unexpected TypeLoc");
	{
	DependentNameTypeLoc DNTLoc = ClsTL.castAs<DependentNameTypeLoc>();
	DNTLoc.setElaboratedKeywordLoc(SourceLocation());
	DNTLoc.setQualifierLoc(NNSLoc.getPrefix());
	DNTLoc.setNameLoc(NNSLoc.getLocalBeginLoc());
	}
	break;

	case NestedNameSpecifier::TypeSpec:
	case NestedNameSpecifier::TypeSpecWithTemplate:
	if (isa<ElaboratedType>(ClsTy)) {
	ElaboratedTypeLoc ETLoc = ClsTL.castAs<ElaboratedTypeLoc>();
	ETLoc.setElaboratedKeywordLoc(SourceLocation());
	ETLoc.setQualifierLoc(NNSLoc.getPrefix());
	TypeLoc NamedTL = ETLoc.getNamedTypeLoc();
	NamedTL.initializeFullCopy(NNSLoc.getTypeLoc());
	} else {
	ClsTL.initializeFullCopy(NNSLoc.getTypeLoc());
	}
	break;

	case NestedNameSpecifier::Namespace:
	case NestedNameSpecifier::NamespaceAlias:
	case NestedNameSpecifier::Global:
	case NestedNameSpecifier::Super:
	llvm_unreachable("Nested-name-specifier must name a type");
	}

	// Finally fill in MemberPointerLocInfo fields.
	TL.setStarLoc(Chunk.Mem.StarLoc);
	TL.setClassTInfo(ClsTInfo);
	}
	void VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Reference);
	// 'Amp' is misleading: this might have been originally
	/// spelled with AmpAmp.
	TL.setAmpLoc(Chunk.Loc);
	}
	void VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Reference);
	assert(!Chunk.Ref.LValueRef);
	TL.setAmpAmpLoc(Chunk.Loc);
	}
	void VisitArrayTypeLoc(ArrayTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Array);
	TL.setLBracketLoc(Chunk.Loc);
	TL.setRBracketLoc(Chunk.EndLoc);
	TL.setSizeExpr(static_cast<Expr*>(Chunk.Arr.NumElts));
	}
	void VisitFunctionTypeLoc(FunctionTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Function);
	TL.setLocalRangeBegin(Chunk.Loc);
	TL.setLocalRangeEnd(Chunk.EndLoc);

	const DeclaratorChunk::FunctionTypeInfo &FTI = Chunk.Fun;
	TL.setLParenLoc(FTI.getLParenLoc());
	TL.setRParenLoc(FTI.getRParenLoc());
	for (unsigned i = 0, e = TL.getNumParams(), tpi = 0; i != e; ++i) {
	ParmVarDecl *Param = cast<ParmVarDecl>(FTI.Params[i].Param);
	TL.setParam(tpi++, Param);
	}
	TL.setExceptionSpecRange(FTI.getExceptionSpecRange());
	}
	void VisitParenTypeLoc(ParenTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Paren);
	TL.setLParenLoc(Chunk.Loc);
	TL.setRParenLoc(Chunk.EndLoc);
	}
	void VisitPipeTypeLoc(PipeTypeLoc TL) {
	assert(Chunk.Kind == DeclaratorChunk::Pipe);
	TL.setKWLoc(Chunk.Loc);
	}
	void VisitBitIntTypeLoc(BitIntTypeLoc TL) {
	TL.setNameLoc(Chunk.Loc);
	}
	void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
	TL.setExpansionLoc(Chunk.Loc);
	}
	void VisitVectorTypeLoc(VectorTypeLoc TL) { TL.setNameLoc(Chunk.Loc); }
	void VisitDependentVectorTypeLoc(DependentVectorTypeLoc TL) {
	TL.setNameLoc(Chunk.Loc);
	}
	void VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) {
	TL.setNameLoc(Chunk.Loc);
	}
	void
	VisitDependentSizedExtVectorTypeLoc(DependentSizedExtVectorTypeLoc TL) {
	TL.setNameLoc(Chunk.Loc);
	}

	void VisitTypeLoc(TypeLoc TL) {
	llvm_unreachable("unsupported TypeLoc kind in declarator!");
	}
	};
	} // end anonymous namespace

	static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) {
	SourceLocation Loc;
	switch (Chunk.Kind) {
	case DeclaratorChunk::Function:
	case DeclaratorChunk::Array:
	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Pipe:
	llvm_unreachable("cannot be _Atomic qualified");

	case DeclaratorChunk::Pointer:
	Loc = Chunk.Ptr.AtomicQualLoc;
	break;

	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::MemberPointer:
	// FIXME: Provide a source location for the _Atomic keyword.
	break;
	}

	ATL.setKWLoc(Loc);
	ATL.setParensRange(SourceRange());
	}

	static void
	fillDependentAddressSpaceTypeLoc(DependentAddressSpaceTypeLoc DASTL,
	const ParsedAttributesView &Attrs) {
	for (const ParsedAttr &AL : Attrs) {
	if (AL.getKind() == ParsedAttr::AT_AddressSpace) {
	DASTL.setAttrNameLoc(AL.getLoc());
	DASTL.setAttrExprOperand(AL.getArgAsExpr(0));
	DASTL.setAttrOperandParensRange(SourceRange());
	return;
	}
	}

	llvm_unreachable(
	"no address_space attribute found at the expected location!");
	}

	static void fillMatrixTypeLoc(MatrixTypeLoc MTL,
	const ParsedAttributesView &Attrs) {
	for (const ParsedAttr &AL : Attrs) {
	if (AL.getKind() == ParsedAttr::AT_MatrixType) {
	MTL.setAttrNameLoc(AL.getLoc());
	MTL.setAttrRowOperand(AL.getArgAsExpr(0));
	MTL.setAttrColumnOperand(AL.getArgAsExpr(1));
	MTL.setAttrOperandParensRange(SourceRange());
	return;
	}
	}

	llvm_unreachable("no matrix_type attribute found at the expected location!");
	}

	/// Create and instantiate a TypeSourceInfo with type source information.
	///
	/// \param T QualType referring to the type as written in source code.
	///
	/// \param ReturnTypeInfo For declarators whose return type does not show
	/// up in the normal place in the declaration specifiers (such as a C++
	/// conversion function), this pointer will refer to a type source information
	/// for that return type.
	static TypeSourceInfo *
	GetTypeSourceInfoForDeclarator(TypeProcessingState &State,
	QualType T, TypeSourceInfo *ReturnTypeInfo) {
	Sema &S = State.getSema();
	Declarator &D = State.getDeclarator();

	TypeSourceInfo *TInfo = S.Context.CreateTypeSourceInfo(T);
	UnqualTypeLoc CurrTL = TInfo->getTypeLoc().getUnqualifiedLoc();

	// Handle parameter packs whose type is a pack expansion.
	if (isa<PackExpansionType>(T)) {
	CurrTL.castAs<PackExpansionTypeLoc>().setEllipsisLoc(D.getEllipsisLoc());
	CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc();
	}

	for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
	// An AtomicTypeLoc might be produced by an atomic qualifier in this
	// declarator chunk.
	if (AtomicTypeLoc ATL = CurrTL.getAs<AtomicTypeLoc>()) {
	fillAtomicQualLoc(ATL, D.getTypeObject(i));
	CurrTL = ATL.getValueLoc().getUnqualifiedLoc();
	}

	while (MacroQualifiedTypeLoc TL = CurrTL.getAs<MacroQualifiedTypeLoc>()) {
	TL.setExpansionLoc(
	State.getExpansionLocForMacroQualifiedType(TL.getTypePtr()));
	CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
	}

	while (AttributedTypeLoc TL = CurrTL.getAs<AttributedTypeLoc>()) {
	fillAttributedTypeLoc(TL, State);
	CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
	}

	+ while (BTFTagAttributedTypeLoc TL = CurrTL.getAs<BTFTagAttributedTypeLoc>())
	+ CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();
	+
	while (DependentAddressSpaceTypeLoc TL =
	CurrTL.getAs<DependentAddressSpaceTypeLoc>()) {
	fillDependentAddressSpaceTypeLoc(TL, D.getTypeObject(i).getAttrs());
	CurrTL = TL.getPointeeTypeLoc().getUnqualifiedLoc();
	}

	if (MatrixTypeLoc TL = CurrTL.getAs<MatrixTypeLoc>())
	fillMatrixTypeLoc(TL, D.getTypeObject(i).getAttrs());

	// FIXME: Ordering here?
	while (AdjustedTypeLoc TL = CurrTL.getAs<AdjustedTypeLoc>())
	CurrTL = TL.getNextTypeLoc().getUnqualifiedLoc();

	DeclaratorLocFiller(S.Context, State, D.getTypeObject(i)).Visit(CurrTL);
	CurrTL = CurrTL.getNextTypeLoc().getUnqualifiedLoc();
	}

	// If we have different source information for the return type, use
	// that. This really only applies to C++ conversion functions.
	if (ReturnTypeInfo) {
	TypeLoc TL = ReturnTypeInfo->getTypeLoc();
	assert(TL.getFullDataSize() == CurrTL.getFullDataSize());
	memcpy(CurrTL.getOpaqueData(), TL.getOpaqueData(), TL.getFullDataSize());
	} else {
	TypeSpecLocFiller(S, S.Context, State, D.getDeclSpec()).Visit(CurrTL);
	}

	return TInfo;
	}

	/// Create a LocInfoType to hold the given QualType and TypeSourceInfo.
	ParsedType Sema::CreateParsedType(QualType T, TypeSourceInfo *TInfo) {
	// FIXME: LocInfoTypes are "transient", only needed for passing to/from Parser
	// and Sema during declaration parsing. Try deallocating/caching them when
	// it's appropriate, instead of allocating them and keeping them around.
	LocInfoType LocT = (LocInfoType)BumpAlloc.Allocate(sizeof(LocInfoType),
	TypeAlignment);
	new (LocT) LocInfoType(T, TInfo);
	assert(LocT->getTypeClass() != T->getTypeClass() &&
	"LocInfoType's TypeClass conflicts with an existing Type class");
	return ParsedType::make(QualType(LocT, 0));
	}

	void LocInfoType::getAsStringInternal(std::string &Str,
	const PrintingPolicy &Policy) const {
	llvm_unreachable("LocInfoType leaked into the type system; an opaque TypeTy*"
	" was used directly instead of getting the QualType through"
	" GetTypeFromParser");
	}

	TypeResult Sema::ActOnTypeName(Scope *S, Declarator &D) {
	// C99 6.7.6: Type names have no identifier. This is already validated by
	// the parser.
	assert(D.getIdentifier() == nullptr &&
	"Type name should have no identifier!");

	TypeSourceInfo *TInfo = GetTypeForDeclarator(D, S);
	QualType T = TInfo->getType();
	if (D.isInvalidType())
	return true;

	// Make sure there are no unused decl attributes on the declarator.
	// We don't want to do this for ObjC parameters because we're going
	// to apply them to the actual parameter declaration.
	// Likewise, we don't want to do this for alias declarations, because
	// we are actually going to build a declaration from this eventually.
	if (D.getContext() != DeclaratorContext::ObjCParameter &&
	D.getContext() != DeclaratorContext::AliasDecl &&
	D.getContext() != DeclaratorContext::AliasTemplate)
	checkUnusedDeclAttributes(D);

	if (getLangOpts().CPlusPlus) {
	// Check that there are no default arguments (C++ only).
	CheckExtraCXXDefaultArguments(D);
	}

	return CreateParsedType(T, TInfo);
	}

	ParsedType Sema::ActOnObjCInstanceType(SourceLocation Loc) {
	QualType T = Context.getObjCInstanceType();
	TypeSourceInfo *TInfo = Context.getTrivialTypeSourceInfo(T, Loc);
	return CreateParsedType(T, TInfo);
	}

	//===----------------------------------------------------------------------===//
	// Type Attribute Processing
	//===----------------------------------------------------------------------===//

	/// Build an AddressSpace index from a constant expression and diagnose any
	/// errors related to invalid address_spaces. Returns true on successfully
	/// building an AddressSpace index.
	static bool BuildAddressSpaceIndex(Sema &S, LangAS &ASIdx,
	const Expr *AddrSpace,
	SourceLocation AttrLoc) {
	if (!AddrSpace->isValueDependent()) {
	Optional<llvm::APSInt> OptAddrSpace =
	AddrSpace->getIntegerConstantExpr(S.Context);
	if (!OptAddrSpace) {
	S.Diag(AttrLoc, diag::err_attribute_argument_type)
	<< "'address_space'" << AANT_ArgumentIntegerConstant
	<< AddrSpace->getSourceRange();
	return false;
	}
	llvm::APSInt &addrSpace = *OptAddrSpace;

	// Bounds checking.
	if (addrSpace.isSigned()) {
	if (addrSpace.isNegative()) {
	S.Diag(AttrLoc, diag::err_attribute_address_space_negative)
	<< AddrSpace->getSourceRange();
	return false;
	}
	addrSpace.setIsSigned(false);
	}

	llvm::APSInt max(addrSpace.getBitWidth());
	max =
	Qualifiers::MaxAddressSpace - (unsigned)LangAS::FirstTargetAddressSpace;

	if (addrSpace > max) {
	S.Diag(AttrLoc, diag::err_attribute_address_space_too_high)
	<< (unsigned)max.getZExtValue() << AddrSpace->getSourceRange();
	return false;
	}

	ASIdx =
	getLangASFromTargetAS(static_cast<unsigned>(addrSpace.getZExtValue()));
	return true;
	}

	// Default value for DependentAddressSpaceTypes
	ASIdx = LangAS::Default;
	return true;
	}

	/// BuildAddressSpaceAttr - Builds a DependentAddressSpaceType if an expression
	/// is uninstantiated. If instantiated it will apply the appropriate address
	/// space to the type. This function allows dependent template variables to be
	/// used in conjunction with the address_space attribute
	QualType Sema::BuildAddressSpaceAttr(QualType &T, LangAS ASIdx, Expr *AddrSpace,
	SourceLocation AttrLoc) {
	if (!AddrSpace->isValueDependent()) {
	if (DiagnoseMultipleAddrSpaceAttributes(*this, T.getAddressSpace(), ASIdx,
	AttrLoc))
	return QualType();

	return Context.getAddrSpaceQualType(T, ASIdx);
	}

	// A check with similar intentions as checking if a type already has an
	// address space except for on a dependent types, basically if the
	// current type is already a DependentAddressSpaceType then its already
	// lined up to have another address space on it and we can't have
	// multiple address spaces on the one pointer indirection
	if (T->getAs<DependentAddressSpaceType>()) {
	Diag(AttrLoc, diag::err_attribute_address_multiple_qualifiers);
	return QualType();
	}

	return Context.getDependentAddressSpaceType(T, AddrSpace, AttrLoc);
	}

	QualType Sema::BuildAddressSpaceAttr(QualType &T, Expr *AddrSpace,
	SourceLocation AttrLoc) {
	LangAS ASIdx;
	if (!BuildAddressSpaceIndex(*this, ASIdx, AddrSpace, AttrLoc))
	return QualType();
	return BuildAddressSpaceAttr(T, ASIdx, AddrSpace, AttrLoc);
	}

	static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr,
	TypeProcessingState &State) {
	Sema &S = State.getSema();

	// Check the number of attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
	<< Attr << 1;
	Attr.setInvalid();
	return;
	}

	// Ensure the argument is a string.
	auto *StrLiteral = dyn_cast<StringLiteral>(Attr.getArgAsExpr(0));
	if (!StrLiteral) {
	S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
	<< Attr << AANT_ArgumentString;
	Attr.setInvalid();
	return;
	}

	ASTContext &Ctx = S.Context;
	StringRef BTFTypeTag = StrLiteral->getString();
	Type = State.getBTFTagAttributedType(
	::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type);
	}

	/// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the
	/// specified type. The attribute contains 1 argument, the id of the address
	/// space for the type.
	static void HandleAddressSpaceTypeAttribute(QualType &Type,
	const ParsedAttr &Attr,
	TypeProcessingState &State) {
	Sema &S = State.getSema();

	// ISO/IEC TR 18037 S5.3 (amending C99 6.7.3): "A function type shall not be
	// qualified by an address-space qualifier."
	if (Type->isFunctionType()) {
	S.Diag(Attr.getLoc(), diag::err_attribute_address_function_type);
	Attr.setInvalid();
	return;
	}

	LangAS ASIdx;
	if (Attr.getKind() == ParsedAttr::AT_AddressSpace) {

	// Check the attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
	<< 1;
	Attr.setInvalid();
	return;
	}

	Expr ASArgExpr = static_cast<Expr >(Attr.getArgAsExpr(0));
	LangAS ASIdx;
	if (!BuildAddressSpaceIndex(S, ASIdx, ASArgExpr, Attr.getLoc())) {
	Attr.setInvalid();
	return;
	}

	ASTContext &Ctx = S.Context;
	auto *ASAttr =
	::new (Ctx) AddressSpaceAttr(Ctx, Attr, static_cast<unsigned>(ASIdx));

	// If the expression is not value dependent (not templated), then we can
	// apply the address space qualifiers just to the equivalent type.
	// Otherwise, we make an AttributedType with the modified and equivalent
	// type the same, and wrap it in a DependentAddressSpaceType. When this
	// dependent type is resolved, the qualifier is added to the equivalent type
	// later.
	QualType T;
	if (!ASArgExpr->isValueDependent()) {
	QualType EquivType =
	S.BuildAddressSpaceAttr(Type, ASIdx, ASArgExpr, Attr.getLoc());
	if (EquivType.isNull()) {
	Attr.setInvalid();
	return;
	}
	T = State.getAttributedType(ASAttr, Type, EquivType);
	} else {
	T = State.getAttributedType(ASAttr, Type, Type);
	T = S.BuildAddressSpaceAttr(T, ASIdx, ASArgExpr, Attr.getLoc());
	}

	if (!T.isNull())
	Type = T;
	else
	Attr.setInvalid();
	} else {
	// The keyword-based type attributes imply which address space to use.
	ASIdx = S.getLangOpts().SYCLIsDevice ? Attr.asSYCLLangAS()
	: Attr.asOpenCLLangAS();

	if (ASIdx == LangAS::Default)
	llvm_unreachable("Invalid address space");

	if (DiagnoseMultipleAddrSpaceAttributes(S, Type.getAddressSpace(), ASIdx,
	Attr.getLoc())) {
	Attr.setInvalid();
	return;
	}

	Type = S.Context.getAddrSpaceQualType(Type, ASIdx);
	}
	}

	/// handleObjCOwnershipTypeAttr - Process an objc_ownership
	/// attribute on the specified type.
	///
	/// Returns 'true' if the attribute was handled.
	static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
	ParsedAttr &attr, QualType &type) {
	bool NonObjCPointer = false;

	if (!type->isDependentType() && !type->isUndeducedType()) {
	if (const PointerType *ptr = type->getAs<PointerType>()) {
	QualType pointee = ptr->getPointeeType();
	if (pointee->isObjCRetainableType() \|\| pointee->isPointerType())
	return false;
	// It is important not to lose the source info that there was an attribute
	// applied to non-objc pointer. We will create an attributed type but
	// its type will be the same as the original type.
	NonObjCPointer = true;
	} else if (!type->isObjCRetainableType()) {
	return false;
	}

	// Don't accept an ownership attribute in the declspec if it would
	// just be the return type of a block pointer.
	if (state.isProcessingDeclSpec()) {
	Declarator &D = state.getDeclarator();
	if (maybeMovePastReturnType(D, D.getNumTypeObjects(),
	/onlyBlockPointers=/true))
	return false;
	}
	}

	Sema &S = state.getSema();
	SourceLocation AttrLoc = attr.getLoc();
	if (AttrLoc.isMacroID())
	AttrLoc =
	S.getSourceManager().getImmediateExpansionRange(AttrLoc).getBegin();

	if (!attr.isArgIdent(0)) {
	S.Diag(AttrLoc, diag::err_attribute_argument_type) << attr
	<< AANT_ArgumentString;
	attr.setInvalid();
	return true;
	}

	IdentifierInfo *II = attr.getArgAsIdent(0)->Ident;
	Qualifiers::ObjCLifetime lifetime;
	if (II->isStr("none"))
	lifetime = Qualifiers::OCL_ExplicitNone;
	else if (II->isStr("strong"))
	lifetime = Qualifiers::OCL_Strong;
	else if (II->isStr("weak"))
	lifetime = Qualifiers::OCL_Weak;
	else if (II->isStr("autoreleasing"))
	lifetime = Qualifiers::OCL_Autoreleasing;
	else {
	S.Diag(AttrLoc, diag::warn_attribute_type_not_supported) << attr << II;
	attr.setInvalid();
	return true;
	}

	// Just ignore lifetime attributes other than __weak and __unsafe_unretained
	// outside of ARC mode.
	if (!S.getLangOpts().ObjCAutoRefCount &&
	lifetime != Qualifiers::OCL_Weak &&
	lifetime != Qualifiers::OCL_ExplicitNone) {
	return true;
	}

	SplitQualType underlyingType = type.split();

	// Check for redundant/conflicting ownership qualifiers.
	if (Qualifiers::ObjCLifetime previousLifetime
	= type.getQualifiers().getObjCLifetime()) {
	// If it's written directly, that's an error.
	if (S.Context.hasDirectOwnershipQualifier(type)) {
	S.Diag(AttrLoc, diag::err_attr_objc_ownership_redundant)
	<< type;
	return true;
	}

	// Otherwise, if the qualifiers actually conflict, pull sugar off
	// and remove the ObjCLifetime qualifiers.
	if (previousLifetime != lifetime) {
	// It's possible to have multiple local ObjCLifetime qualifiers. We
	// can't stop after we reach a type that is directly qualified.
	const Type *prevTy = nullptr;
	while (!prevTy \|\| prevTy != underlyingType.Ty) {
	prevTy = underlyingType.Ty;
	underlyingType = underlyingType.getSingleStepDesugaredType();
	}
	underlyingType.Quals.removeObjCLifetime();
	}
	}

	underlyingType.Quals.addObjCLifetime(lifetime);

	if (NonObjCPointer) {
	StringRef name = attr.getAttrName()->getName();
	switch (lifetime) {
	case Qualifiers::OCL_None:
	case Qualifiers::OCL_ExplicitNone:
	break;
	case Qualifiers::OCL_Strong: name = "__strong"; break;
	case Qualifiers::OCL_Weak: name = "__weak"; break;
	case Qualifiers::OCL_Autoreleasing: name = "__autoreleasing"; break;
	}
	S.Diag(AttrLoc, diag::warn_type_attribute_wrong_type) << name
	<< TDS_ObjCObjOrBlock << type;
	}

	// Don't actually add the __unsafe_unretained qualifier in non-ARC files,
	// because having both 'T' and '__unsafe_unretained T' exist in the type
	// system causes unfortunate widespread consistency problems. (For example,
	// they're not considered compatible types, and we mangle them identicially
	// as template arguments.) These problems are all individually fixable,
	// but it's easier to just not add the qualifier and instead sniff it out
	// in specific places using isObjCInertUnsafeUnretainedType().
	//
	// Doing this does means we miss some trivial consistency checks that
	// would've triggered in ARC, but that's better than trying to solve all
	// the coexistence problems with __unsafe_unretained.
	if (!S.getLangOpts().ObjCAutoRefCount &&
	lifetime == Qualifiers::OCL_ExplicitNone) {
	type = state.getAttributedType(
	createSimpleAttr<ObjCInertUnsafeUnretainedAttr>(S.Context, attr),
	type, type);
	return true;
	}

	QualType origType = type;
	if (!NonObjCPointer)
	type = S.Context.getQualifiedType(underlyingType);

	// If we have a valid source location for the attribute, use an
	// AttributedType instead.
	if (AttrLoc.isValid()) {
	type = state.getAttributedType(::new (S.Context)
	ObjCOwnershipAttr(S.Context, attr, II),
	origType, type);
	}

	auto diagnoseOrDelay = [](Sema &S, SourceLocation loc,
	unsigned diagnostic, QualType type) {
	if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
	S.DelayedDiagnostics.add(
	sema::DelayedDiagnostic::makeForbiddenType(
	S.getSourceManager().getExpansionLoc(loc),
	diagnostic, type, /ignored/ 0));
	} else {
	S.Diag(loc, diagnostic);
	}
	};

	// Sometimes, __weak isn't allowed.
	if (lifetime == Qualifiers::OCL_Weak &&
	!S.getLangOpts().ObjCWeak && !NonObjCPointer) {

	// Use a specialized diagnostic if the runtime just doesn't support them.
	unsigned diagnostic =
	(S.getLangOpts().ObjCWeakRuntime ? diag::err_arc_weak_disabled
	: diag::err_arc_weak_no_runtime);

	// In any case, delay the diagnostic until we know what we're parsing.
	diagnoseOrDelay(S, AttrLoc, diagnostic, type);

	attr.setInvalid();
	return true;
	}

	// Forbid __weak for class objects marked as
	// objc_arc_weak_reference_unavailable
	if (lifetime == Qualifiers::OCL_Weak) {
	if (const ObjCObjectPointerType *ObjT =
	type->getAs<ObjCObjectPointerType>()) {
	if (ObjCInterfaceDecl *Class = ObjT->getInterfaceDecl()) {
	if (Class->isArcWeakrefUnavailable()) {
	S.Diag(AttrLoc, diag::err_arc_unsupported_weak_class);
	S.Diag(ObjT->getInterfaceDecl()->getLocation(),
	diag::note_class_declared);
	}
	}
	}
	}

	return true;
	}

	/// handleObjCGCTypeAttr - Process the __attribute__((objc_gc)) type
	/// attribute on the specified type. Returns true to indicate that
	/// the attribute was handled, false to indicate that the type does
	/// not permit the attribute.
	static bool handleObjCGCTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
	QualType &type) {
	Sema &S = state.getSema();

	// Delay if this isn't some kind of pointer.
	if (!type->isPointerType() &&
	!type->isObjCObjectPointerType() &&
	!type->isBlockPointerType())
	return false;

	if (type.getObjCGCAttr() != Qualifiers::GCNone) {
	S.Diag(attr.getLoc(), diag::err_attribute_multiple_objc_gc);
	attr.setInvalid();
	return true;
	}

	// Check the attribute arguments.
	if (!attr.isArgIdent(0)) {
	S.Diag(attr.getLoc(), diag::err_attribute_argument_type)
	<< attr << AANT_ArgumentString;
	attr.setInvalid();
	return true;
	}
	Qualifiers::GC GCAttr;
	if (attr.getNumArgs() > 1) {
	S.Diag(attr.getLoc(), diag::err_attribute_wrong_number_arguments) << attr
	<< 1;
	attr.setInvalid();
	return true;
	}

	IdentifierInfo *II = attr.getArgAsIdent(0)->Ident;
	if (II->isStr("weak"))
	GCAttr = Qualifiers::Weak;
	else if (II->isStr("strong"))
	GCAttr = Qualifiers::Strong;
	else {
	S.Diag(attr.getLoc(), diag::warn_attribute_type_not_supported)
	<< attr << II;
	attr.setInvalid();
	return true;
	}

	QualType origType = type;
	type = S.Context.getObjCGCQualType(origType, GCAttr);

	// Make an attributed type to preserve the source information.
	if (attr.getLoc().isValid())
	type = state.getAttributedType(
	::new (S.Context) ObjCGCAttr(S.Context, attr, II), origType, type);

	return true;
	}

	namespace {
	/// A helper class to unwrap a type down to a function for the
	/// purposes of applying attributes there.
	///
	/// Use:
	/// FunctionTypeUnwrapper unwrapped(SemaRef, T);
	/// if (unwrapped.isFunctionType()) {
	/// const FunctionType *fn = unwrapped.get();
	/// // change fn somehow
	/// T = unwrapped.wrap(fn);
	/// }
	struct FunctionTypeUnwrapper {
	enum WrapKind {
	Desugar,
	Attributed,
	Parens,
	Array,
	Pointer,
	BlockPointer,
	Reference,
	MemberPointer,
	MacroQualified,
	};

	QualType Original;
	const FunctionType *Fn;
	SmallVector<unsigned char /WrapKind/, 8> Stack;

	FunctionTypeUnwrapper(Sema &S, QualType T) : Original(T) {
	while (true) {
	const Type *Ty = T.getTypePtr();
	if (isa<FunctionType>(Ty)) {
	Fn = cast<FunctionType>(Ty);
	return;
	} else if (isa<ParenType>(Ty)) {
	T = cast<ParenType>(Ty)->getInnerType();
	Stack.push_back(Parens);
	} else if (isa<ConstantArrayType>(Ty) \|\| isa<VariableArrayType>(Ty) \|\|
	isa<IncompleteArrayType>(Ty)) {
	T = cast<ArrayType>(Ty)->getElementType();
	Stack.push_back(Array);
	} else if (isa<PointerType>(Ty)) {
	T = cast<PointerType>(Ty)->getPointeeType();
	Stack.push_back(Pointer);
	} else if (isa<BlockPointerType>(Ty)) {
	T = cast<BlockPointerType>(Ty)->getPointeeType();
	Stack.push_back(BlockPointer);
	} else if (isa<MemberPointerType>(Ty)) {
	T = cast<MemberPointerType>(Ty)->getPointeeType();
	Stack.push_back(MemberPointer);
	} else if (isa<ReferenceType>(Ty)) {
	T = cast<ReferenceType>(Ty)->getPointeeType();
	Stack.push_back(Reference);
	} else if (isa<AttributedType>(Ty)) {
	T = cast<AttributedType>(Ty)->getEquivalentType();
	Stack.push_back(Attributed);
	} else if (isa<MacroQualifiedType>(Ty)) {
	T = cast<MacroQualifiedType>(Ty)->getUnderlyingType();
	Stack.push_back(MacroQualified);
	} else {
	const Type *DTy = Ty->getUnqualifiedDesugaredType();
	if (Ty == DTy) {
	Fn = nullptr;
	return;
	}

	T = QualType(DTy, 0);
	Stack.push_back(Desugar);
	}
	}
	}

	bool isFunctionType() const { return (Fn != nullptr); }
	const FunctionType *get() const { return Fn; }

	QualType wrap(Sema &S, const FunctionType *New) {
	// If T wasn't modified from the unwrapped type, do nothing.
	if (New == get()) return Original;

	Fn = New;
	return wrap(S.Context, Original, 0);
	}

	private:
	QualType wrap(ASTContext &C, QualType Old, unsigned I) {
	if (I == Stack.size())
	return C.getQualifiedType(Fn, Old.getQualifiers());

	// Build up the inner type, applying the qualifiers from the old
	// type to the new type.
	SplitQualType SplitOld = Old.split();

	// As a special case, tail-recurse if there are no qualifiers.
	if (SplitOld.Quals.empty())
	return wrap(C, SplitOld.Ty, I);
	return C.getQualifiedType(wrap(C, SplitOld.Ty, I), SplitOld.Quals);
	}

	QualType wrap(ASTContext &C, const Type *Old, unsigned I) {
	if (I == Stack.size()) return QualType(Fn, 0);

	switch (static_cast<WrapKind>(Stack[I++])) {
	case Desugar:
	// This is the point at which we potentially lose source
	// information.
	return wrap(C, Old->getUnqualifiedDesugaredType(), I);

	case Attributed:
	return wrap(C, cast<AttributedType>(Old)->getEquivalentType(), I);

	case Parens: {
	QualType New = wrap(C, cast<ParenType>(Old)->getInnerType(), I);
	return C.getParenType(New);
	}

	case MacroQualified:
	return wrap(C, cast<MacroQualifiedType>(Old)->getUnderlyingType(), I);

	case Array: {
	if (const auto *CAT = dyn_cast<ConstantArrayType>(Old)) {
	QualType New = wrap(C, CAT->getElementType(), I);
	return C.getConstantArrayType(New, CAT->getSize(), CAT->getSizeExpr(),
	CAT->getSizeModifier(),
	CAT->getIndexTypeCVRQualifiers());
	}

	if (const auto *VAT = dyn_cast<VariableArrayType>(Old)) {
	QualType New = wrap(C, VAT->getElementType(), I);
	return C.getVariableArrayType(
	New, VAT->getSizeExpr(), VAT->getSizeModifier(),
	VAT->getIndexTypeCVRQualifiers(), VAT->getBracketsRange());
	}

	const auto *IAT = cast<IncompleteArrayType>(Old);
	QualType New = wrap(C, IAT->getElementType(), I);
	return C.getIncompleteArrayType(New, IAT->getSizeModifier(),
	IAT->getIndexTypeCVRQualifiers());
	}

	case Pointer: {
	QualType New = wrap(C, cast<PointerType>(Old)->getPointeeType(), I);
	return C.getPointerType(New);
	}

	case BlockPointer: {
	QualType New = wrap(C, cast<BlockPointerType>(Old)->getPointeeType(),I);
	return C.getBlockPointerType(New);
	}

	case MemberPointer: {
	const MemberPointerType *OldMPT = cast<MemberPointerType>(Old);
	QualType New = wrap(C, OldMPT->getPointeeType(), I);
	return C.getMemberPointerType(New, OldMPT->getClass());
	}

	case Reference: {
	const ReferenceType *OldRef = cast<ReferenceType>(Old);
	QualType New = wrap(C, OldRef->getPointeeType(), I);
	if (isa<LValueReferenceType>(OldRef))
	return C.getLValueReferenceType(New, OldRef->isSpelledAsLValue());
	else
	return C.getRValueReferenceType(New);
	}
	}

	llvm_unreachable("unknown wrapping kind");
	}
	};
	} // end anonymous namespace

	static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State,
	ParsedAttr &PAttr, QualType &Type) {
	Sema &S = State.getSema();

	Attr *A;
	switch (PAttr.getKind()) {
	default: llvm_unreachable("Unknown attribute kind");
	case ParsedAttr::AT_Ptr32:
	A = createSimpleAttr<Ptr32Attr>(S.Context, PAttr);
	break;
	case ParsedAttr::AT_Ptr64:
	A = createSimpleAttr<Ptr64Attr>(S.Context, PAttr);
	break;
	case ParsedAttr::AT_SPtr:
	A = createSimpleAttr<SPtrAttr>(S.Context, PAttr);
	break;
	case ParsedAttr::AT_UPtr:
	A = createSimpleAttr<UPtrAttr>(S.Context, PAttr);
	break;
	}

	std::bitset<attr::LastAttr> Attrs;
	attr::Kind NewAttrKind = A->getKind();
	QualType Desugared = Type;
	const AttributedType *AT = dyn_cast<AttributedType>(Type);
	while (AT) {
	Attrs[AT->getAttrKind()] = true;
	Desugared = AT->getModifiedType();
	AT = dyn_cast<AttributedType>(Desugared);
	}

	// You cannot specify duplicate type attributes, so if the attribute has
	// already been applied, flag it.
	if (Attrs[NewAttrKind]) {
	S.Diag(PAttr.getLoc(), diag::warn_duplicate_attribute_exact) << PAttr;
	return true;
	}
	Attrs[NewAttrKind] = true;

	// You cannot have both __sptr and __uptr on the same type, nor can you
	// have __ptr32 and __ptr64.
	if (Attrs[attr::Ptr32] && Attrs[attr::Ptr64]) {
	S.Diag(PAttr.getLoc(), diag::err_attributes_are_not_compatible)
	<< "'__ptr32'"
	<< "'__ptr64'";
	return true;
	} else if (Attrs[attr::SPtr] && Attrs[attr::UPtr]) {
	S.Diag(PAttr.getLoc(), diag::err_attributes_are_not_compatible)
	<< "'__sptr'"
	<< "'__uptr'";
	return true;
	}

	// Pointer type qualifiers can only operate on pointer types, but not
	// pointer-to-member types.
	//
	// FIXME: Should we really be disallowing this attribute if there is any
	// type sugar between it and the pointer (other than attributes)? Eg, this
	// disallows the attribute on a parenthesized pointer.
	// And if so, should we really allow any type attribute?
	if (!isa<PointerType>(Desugared)) {
	if (Type->isMemberPointerType())
	S.Diag(PAttr.getLoc(), diag::err_attribute_no_member_pointers) << PAttr;
	else
	S.Diag(PAttr.getLoc(), diag::err_attribute_pointers_only) << PAttr << 0;
	return true;
	}

	// Add address space to type based on its attributes.
	LangAS ASIdx = LangAS::Default;
	uint64_t PtrWidth = S.Context.getTargetInfo().getPointerWidth(0);
	if (PtrWidth == 32) {
	if (Attrs[attr::Ptr64])
	ASIdx = LangAS::ptr64;
	else if (Attrs[attr::UPtr])
	ASIdx = LangAS::ptr32_uptr;
	} else if (PtrWidth == 64 && Attrs[attr::Ptr32]) {
	if (Attrs[attr::UPtr])
	ASIdx = LangAS::ptr32_uptr;
	else
	ASIdx = LangAS::ptr32_sptr;
	}

	QualType Pointee = Type->getPointeeType();
	if (ASIdx != LangAS::Default)
	Pointee = S.Context.getAddrSpaceQualType(
	S.Context.removeAddrSpaceQualType(Pointee), ASIdx);
	Type = State.getAttributedType(A, Type, S.Context.getPointerType(Pointee));
	return false;
	}

	/// Map a nullability attribute kind to a nullability kind.
	static NullabilityKind mapNullabilityAttrKind(ParsedAttr::Kind kind) {
	switch (kind) {
	case ParsedAttr::AT_TypeNonNull:
	return NullabilityKind::NonNull;

	case ParsedAttr::AT_TypeNullable:
	return NullabilityKind::Nullable;

	case ParsedAttr::AT_TypeNullableResult:
	return NullabilityKind::NullableResult;

	case ParsedAttr::AT_TypeNullUnspecified:
	return NullabilityKind::Unspecified;

	default:
	llvm_unreachable("not a nullability attribute kind");
	}
	}

	/// Applies a nullability type specifier to the given type, if possible.
	///
	/// \param state The type processing state.
	///
	/// \param type The type to which the nullability specifier will be
	/// added. On success, this type will be updated appropriately.
	///
	/// \param attr The attribute as written on the type.
	///
	/// \param allowOnArrayType Whether to accept nullability specifiers on an
	/// array type (e.g., because it will decay to a pointer).
	///
	/// \returns true if a problem has been diagnosed, false on success.
	static bool checkNullabilityTypeSpecifier(TypeProcessingState &state,
	QualType &type,
	ParsedAttr &attr,
	bool allowOnArrayType) {
	Sema &S = state.getSema();

	NullabilityKind nullability = mapNullabilityAttrKind(attr.getKind());
	SourceLocation nullabilityLoc = attr.getLoc();
	bool isContextSensitive = attr.isContextSensitiveKeywordAttribute();

	recordNullabilitySeen(S, nullabilityLoc);

	// Check for existing nullability attributes on the type.
	QualType desugared = type;
	while (auto attributed = dyn_cast<AttributedType>(desugared.getTypePtr())) {
	// Check whether there is already a null
	if (auto existingNullability = attributed->getImmediateNullability()) {
	// Duplicated nullability.
	if (nullability == *existingNullability) {
	S.Diag(nullabilityLoc, diag::warn_nullability_duplicate)
	<< DiagNullabilityKind(nullability, isContextSensitive)
	<< FixItHint::CreateRemoval(nullabilityLoc);

	break;
	}

	// Conflicting nullability.
	S.Diag(nullabilityLoc, diag::err_nullability_conflicting)
	<< DiagNullabilityKind(nullability, isContextSensitive)
	<< DiagNullabilityKind(*existingNullability, false);
	return true;
	}

	desugared = attributed->getModifiedType();
	}

	// If there is already a different nullability specifier, complain.
	// This (unlike the code above) looks through typedefs that might
	// have nullability specifiers on them, which means we cannot
	// provide a useful Fix-It.
	if (auto existingNullability = desugared->getNullability(S.Context)) {
	if (nullability != *existingNullability) {
	S.Diag(nullabilityLoc, diag::err_nullability_conflicting)
	<< DiagNullabilityKind(nullability, isContextSensitive)
	<< DiagNullabilityKind(*existingNullability, false);

	// Try to find the typedef with the existing nullability specifier.
	if (auto typedefType = desugared->getAs<TypedefType>()) {
	TypedefNameDecl *typedefDecl = typedefType->getDecl();
	QualType underlyingType = typedefDecl->getUnderlyingType();
	if (auto typedefNullability
	= AttributedType::stripOuterNullability(underlyingType)) {
	if (typedefNullability == existingNullability) {
	S.Diag(typedefDecl->getLocation(), diag::note_nullability_here)
	<< DiagNullabilityKind(*existingNullability, false);
	}
	}
	}

	return true;
	}
	}

	// If this definitely isn't a pointer type, reject the specifier.
	if (!desugared->canHaveNullability() &&
	!(allowOnArrayType && desugared->isArrayType())) {
	S.Diag(nullabilityLoc, diag::err_nullability_nonpointer)
	<< DiagNullabilityKind(nullability, isContextSensitive) << type;
	return true;
	}

	// For the context-sensitive keywords/Objective-C property
	// attributes, require that the type be a single-level pointer.
	if (isContextSensitive) {
	// Make sure that the pointee isn't itself a pointer type.
	const Type *pointeeType = nullptr;
	if (desugared->isArrayType())
	pointeeType = desugared->getArrayElementTypeNoTypeQual();
	else if (desugared->isAnyPointerType())
	pointeeType = desugared->getPointeeType().getTypePtr();

	if (pointeeType && (pointeeType->isAnyPointerType() \|\|
	pointeeType->isObjCObjectPointerType() \|\|
	pointeeType->isMemberPointerType())) {
	S.Diag(nullabilityLoc, diag::err_nullability_cs_multilevel)
	<< DiagNullabilityKind(nullability, true)
	<< type;
	S.Diag(nullabilityLoc, diag::note_nullability_type_specifier)
	<< DiagNullabilityKind(nullability, false)
	<< type
	<< FixItHint::CreateReplacement(nullabilityLoc,
	getNullabilitySpelling(nullability));
	return true;
	}
	}

	// Form the attributed type.
	type = state.getAttributedType(
	createNullabilityAttr(S.Context, attr, nullability), type, type);
	return false;
	}

	/// Check the application of the Objective-C '__kindof' qualifier to
	/// the given type.
	static bool checkObjCKindOfType(TypeProcessingState &state, QualType &type,
	ParsedAttr &attr) {
	Sema &S = state.getSema();

	if (isa<ObjCTypeParamType>(type)) {
	// Build the attributed type to record where __kindof occurred.
	type = state.getAttributedType(
	createSimpleAttr<ObjCKindOfAttr>(S.Context, attr), type, type);
	return false;
	}

	// Find out if it's an Objective-C object or object pointer type;
	const ObjCObjectPointerType *ptrType = type->getAs<ObjCObjectPointerType>();
	const ObjCObjectType *objType = ptrType ? ptrType->getObjectType()
	: type->getAs<ObjCObjectType>();

	// If not, we can't apply __kindof.
	if (!objType) {
	// FIXME: Handle dependent types that aren't yet object types.
	S.Diag(attr.getLoc(), diag::err_objc_kindof_nonobject)
	<< type;
	return true;
	}

	// Rebuild the "equivalent" type, which pushes __kindof down into
	// the object type.
	// There is no need to apply kindof on an unqualified id type.
	QualType equivType = S.Context.getObjCObjectType(
	objType->getBaseType(), objType->getTypeArgsAsWritten(),
	objType->getProtocols(),
	/isKindOf=/objType->isObjCUnqualifiedId() ? false : true);

	// If we started with an object pointer type, rebuild it.
	if (ptrType) {
	equivType = S.Context.getObjCObjectPointerType(equivType);
	if (auto nullability = type->getNullability(S.Context)) {
	// We create a nullability attribute from the __kindof attribute.
	// Make sure that will make sense.
	assert(attr.getAttributeSpellingListIndex() == 0 &&
	"multiple spellings for __kindof?");
	Attr A = createNullabilityAttr(S.Context, attr, nullability);
	A->setImplicit(true);
	equivType = state.getAttributedType(A, equivType, equivType);
	}
	}

	// Build the attributed type to record where __kindof occurred.
	type = state.getAttributedType(
	createSimpleAttr<ObjCKindOfAttr>(S.Context, attr), type, equivType);
	return false;
	}

	/// Distribute a nullability type attribute that cannot be applied to
	/// the type specifier to a pointer, block pointer, or member pointer
	/// declarator, complaining if necessary.
	///
	/// \returns true if the nullability annotation was distributed, false
	/// otherwise.
	static bool distributeNullabilityTypeAttr(TypeProcessingState &state,
	QualType type, ParsedAttr &attr) {
	Declarator &declarator = state.getDeclarator();

	/// Attempt to move the attribute to the specified chunk.
	auto moveToChunk = [&](DeclaratorChunk &chunk, bool inFunction) -> bool {
	// If there is already a nullability attribute there, don't add
	// one.
	if (hasNullabilityAttr(chunk.getAttrs()))
	return false;

	// Complain about the nullability qualifier being in the wrong
	// place.
	enum {
	PK_Pointer,
	PK_BlockPointer,
	PK_MemberPointer,
	PK_FunctionPointer,
	PK_MemberFunctionPointer,
	} pointerKind
	= chunk.Kind == DeclaratorChunk::Pointer ? (inFunction ? PK_FunctionPointer
	: PK_Pointer)
	: chunk.Kind == DeclaratorChunk::BlockPointer ? PK_BlockPointer
	: inFunction? PK_MemberFunctionPointer : PK_MemberPointer;

	auto diag = state.getSema().Diag(attr.getLoc(),
	diag::warn_nullability_declspec)
	<< DiagNullabilityKind(mapNullabilityAttrKind(attr.getKind()),
	attr.isContextSensitiveKeywordAttribute())
	<< type
	<< static_cast<unsigned>(pointerKind);

	// FIXME: MemberPointer chunks don't carry the location of the *.
	if (chunk.Kind != DeclaratorChunk::MemberPointer) {
	diag << FixItHint::CreateRemoval(attr.getLoc())
	<< FixItHint::CreateInsertion(
	state.getSema().getPreprocessor().getLocForEndOfToken(
	chunk.Loc),
	" " + attr.getAttrName()->getName().str() + " ");
	}

	moveAttrFromListToList(attr, state.getCurrentAttributes(),
	chunk.getAttrs());
	return true;
	};

	// Move it to the outermost pointer, member pointer, or block
	// pointer declarator.
	for (unsigned i = state.getCurrentChunkIndex(); i != 0; --i) {
	DeclaratorChunk &chunk = declarator.getTypeObject(i-1);
	switch (chunk.Kind) {
	case DeclaratorChunk::Pointer:
	case DeclaratorChunk::BlockPointer:
	case DeclaratorChunk::MemberPointer:
	return moveToChunk(chunk, false);

	case DeclaratorChunk::Paren:
	case DeclaratorChunk::Array:
	continue;

	case DeclaratorChunk::Function:
	// Try to move past the return type to a function/block/member
	// function pointer.
	if (DeclaratorChunk *dest = maybeMovePastReturnType(
	declarator, i,
	/onlyBlockPointers=/false)) {
	return moveToChunk(*dest, true);
	}

	return false;

	// Don't walk through these.
	case DeclaratorChunk::Reference:
	case DeclaratorChunk::Pipe:
	return false;
	}
	}

	return false;
	}

	static Attr *getCCTypeAttr(ASTContext &Ctx, ParsedAttr &Attr) {
	assert(!Attr.isInvalid());
	switch (Attr.getKind()) {
	default:
	llvm_unreachable("not a calling convention attribute");
	case ParsedAttr::AT_CDecl:
	return createSimpleAttr<CDeclAttr>(Ctx, Attr);
	case ParsedAttr::AT_FastCall:
	return createSimpleAttr<FastCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_StdCall:
	return createSimpleAttr<StdCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_ThisCall:
	return createSimpleAttr<ThisCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_RegCall:
	return createSimpleAttr<RegCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_Pascal:
	return createSimpleAttr<PascalAttr>(Ctx, Attr);
	case ParsedAttr::AT_SwiftCall:
	return createSimpleAttr<SwiftCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_SwiftAsyncCall:
	return createSimpleAttr<SwiftAsyncCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_VectorCall:
	return createSimpleAttr<VectorCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_AArch64VectorPcs:
	return createSimpleAttr<AArch64VectorPcsAttr>(Ctx, Attr);
	case ParsedAttr::AT_AArch64SVEPcs:
	return createSimpleAttr<AArch64SVEPcsAttr>(Ctx, Attr);
	case ParsedAttr::AT_AMDGPUKernelCall:
	return createSimpleAttr<AMDGPUKernelCallAttr>(Ctx, Attr);
	case ParsedAttr::AT_Pcs: {
	// The attribute may have had a fixit applied where we treated an
	// identifier as a string literal. The contents of the string are valid,
	// but the form may not be.
	StringRef Str;
	if (Attr.isArgExpr(0))
	Str = cast<StringLiteral>(Attr.getArgAsExpr(0))->getString();
	else
	Str = Attr.getArgAsIdent(0)->Ident->getName();
	PcsAttr::PCSType Type;
	if (!PcsAttr::ConvertStrToPCSType(Str, Type))
	llvm_unreachable("already validated the attribute");
	return ::new (Ctx) PcsAttr(Ctx, Attr, Type);
	}
	case ParsedAttr::AT_IntelOclBicc:
	return createSimpleAttr<IntelOclBiccAttr>(Ctx, Attr);
	case ParsedAttr::AT_MSABI:
	return createSimpleAttr<MSABIAttr>(Ctx, Attr);
	case ParsedAttr::AT_SysVABI:
	return createSimpleAttr<SysVABIAttr>(Ctx, Attr);
	case ParsedAttr::AT_PreserveMost:
	return createSimpleAttr<PreserveMostAttr>(Ctx, Attr);
	case ParsedAttr::AT_PreserveAll:
	return createSimpleAttr<PreserveAllAttr>(Ctx, Attr);
	}
	llvm_unreachable("unexpected attribute kind!");
	}

	/// Process an individual function attribute. Returns true to
	/// indicate that the attribute was handled, false if it wasn't.
	static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
	QualType &type) {
	Sema &S = state.getSema();

	FunctionTypeUnwrapper unwrapped(S, type);

	if (attr.getKind() == ParsedAttr::AT_NoReturn) {
	if (S.CheckAttrNoArgs(attr))
	return true;

	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	// Otherwise we can process right away.
	FunctionType::ExtInfo EI = unwrapped.get()->getExtInfo().withNoReturn(true);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	return true;
	}

	if (attr.getKind() == ParsedAttr::AT_CmseNSCall) {
	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	// Ignore if we don't have CMSE enabled.
	if (!S.getLangOpts().Cmse) {
	S.Diag(attr.getLoc(), diag::warn_attribute_ignored) << attr;
	attr.setInvalid();
	return true;
	}

	// Otherwise we can process right away.
	FunctionType::ExtInfo EI =
	unwrapped.get()->getExtInfo().withCmseNSCall(true);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	return true;
	}

	// ns_returns_retained is not always a type attribute, but if we got
	// here, we're treating it as one right now.
	if (attr.getKind() == ParsedAttr::AT_NSReturnsRetained) {
	if (attr.getNumArgs()) return true;

	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	// Check whether the return type is reasonable.
	if (S.checkNSReturnsRetainedReturnType(attr.getLoc(),
	unwrapped.get()->getReturnType()))
	return true;

	// Only actually change the underlying type in ARC builds.
	QualType origType = type;
	if (state.getSema().getLangOpts().ObjCAutoRefCount) {
	FunctionType::ExtInfo EI
	= unwrapped.get()->getExtInfo().withProducesResult(true);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	}
	type = state.getAttributedType(
	createSimpleAttr<NSReturnsRetainedAttr>(S.Context, attr),
	origType, type);
	return true;
	}

	if (attr.getKind() == ParsedAttr::AT_AnyX86NoCallerSavedRegisters) {
	if (S.CheckAttrTarget(attr) \|\| S.CheckAttrNoArgs(attr))
	return true;

	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	FunctionType::ExtInfo EI =
	unwrapped.get()->getExtInfo().withNoCallerSavedRegs(true);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	return true;
	}

	if (attr.getKind() == ParsedAttr::AT_AnyX86NoCfCheck) {
	if (!S.getLangOpts().CFProtectionBranch) {
	S.Diag(attr.getLoc(), diag::warn_nocf_check_attribute_ignored);
	attr.setInvalid();
	return true;
	}

	if (S.CheckAttrTarget(attr) \|\| S.CheckAttrNoArgs(attr))
	return true;

	// If this is not a function type, warning will be asserted by subject
	// check.
	if (!unwrapped.isFunctionType())
	return true;

	FunctionType::ExtInfo EI =
	unwrapped.get()->getExtInfo().withNoCfCheck(true);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	return true;
	}

	if (attr.getKind() == ParsedAttr::AT_Regparm) {
	unsigned value;
	if (S.CheckRegparmAttr(attr, value))
	return true;

	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	// Diagnose regparm with fastcall.
	const FunctionType *fn = unwrapped.get();
	CallingConv CC = fn->getCallConv();
	if (CC == CC_X86FastCall) {
	S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
	<< FunctionType::getNameForCallConv(CC)
	<< "regparm";
	attr.setInvalid();
	return true;
	}

	FunctionType::ExtInfo EI =
	unwrapped.get()->getExtInfo().withRegParm(value);
	type = unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	return true;
	}

	if (attr.getKind() == ParsedAttr::AT_NoThrow) {
	// Delay if this is not a function type.
	if (!unwrapped.isFunctionType())
	return false;

	if (S.CheckAttrNoArgs(attr)) {
	attr.setInvalid();
	return true;
	}

	// Otherwise we can process right away.
	auto *Proto = unwrapped.get()->castAs<FunctionProtoType>();

	// MSVC ignores nothrow if it is in conflict with an explicit exception
	// specification.
	if (Proto->hasExceptionSpec()) {
	switch (Proto->getExceptionSpecType()) {
	case EST_None:
	llvm_unreachable("This doesn't have an exception spec!");

	case EST_DynamicNone:
	case EST_BasicNoexcept:
	case EST_NoexceptTrue:
	case EST_NoThrow:
	// Exception spec doesn't conflict with nothrow, so don't warn.
	LLVM_FALLTHROUGH;
	case EST_Unparsed:
	case EST_Uninstantiated:
	case EST_DependentNoexcept:
	case EST_Unevaluated:
	// We don't have enough information to properly determine if there is a
	// conflict, so suppress the warning.
	break;
	case EST_Dynamic:
	case EST_MSAny:
	case EST_NoexceptFalse:
	S.Diag(attr.getLoc(), diag::warn_nothrow_attribute_ignored);
	break;
	}
	return true;
	}

	type = unwrapped.wrap(
	S, S.Context
	.getFunctionTypeWithExceptionSpec(
	QualType{Proto, 0},
	FunctionProtoType::ExceptionSpecInfo{EST_NoThrow})
	->getAs<FunctionType>());
	return true;
	}

	// Delay if the type didn't work out to a function.
	if (!unwrapped.isFunctionType()) return false;

	// Otherwise, a calling convention.
	CallingConv CC;
	if (S.CheckCallingConvAttr(attr, CC))
	return true;

	const FunctionType *fn = unwrapped.get();
	CallingConv CCOld = fn->getCallConv();
	Attr *CCAttr = getCCTypeAttr(S.Context, attr);

	if (CCOld != CC) {
	// Error out on when there's already an attribute on the type
	// and the CCs don't match.
	if (S.getCallingConvAttributedType(type)) {
	S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
	<< FunctionType::getNameForCallConv(CC)
	<< FunctionType::getNameForCallConv(CCOld);
	attr.setInvalid();
	return true;
	}
	}

	// Diagnose use of variadic functions with calling conventions that
	// don't support them (e.g. because they're callee-cleanup).
	// We delay warning about this on unprototyped function declarations
	// until after redeclaration checking, just in case we pick up a
	// prototype that way. And apparently we also "delay" warning about
	// unprototyped function types in general, despite not necessarily having
	// much ability to diagnose it later.
	if (!supportsVariadicCall(CC)) {
	const FunctionProtoType *FnP = dyn_cast<FunctionProtoType>(fn);
	if (FnP && FnP->isVariadic()) {
	// stdcall and fastcall are ignored with a warning for GCC and MS
	// compatibility.
	if (CC == CC_X86StdCall \|\| CC == CC_X86FastCall)
	return S.Diag(attr.getLoc(), diag::warn_cconv_unsupported)
	<< FunctionType::getNameForCallConv(CC)
	<< (int)Sema::CallingConventionIgnoredReason::VariadicFunction;

	attr.setInvalid();
	return S.Diag(attr.getLoc(), diag::err_cconv_varargs)
	<< FunctionType::getNameForCallConv(CC);
	}
	}

	// Also diagnose fastcall with regparm.
	if (CC == CC_X86FastCall && fn->getHasRegParm()) {
	S.Diag(attr.getLoc(), diag::err_attributes_are_not_compatible)
	<< "regparm" << FunctionType::getNameForCallConv(CC_X86FastCall);
	attr.setInvalid();
	return true;
	}

	// Modify the CC from the wrapped function type, wrap it all back, and then
	// wrap the whole thing in an AttributedType as written. The modified type
	// might have a different CC if we ignored the attribute.
	QualType Equivalent;
	if (CCOld == CC) {
	Equivalent = type;
	} else {
	auto EI = unwrapped.get()->getExtInfo().withCallingConv(CC);
	Equivalent =
	unwrapped.wrap(S, S.Context.adjustFunctionType(unwrapped.get(), EI));
	}
	type = state.getAttributedType(CCAttr, type, Equivalent);
	return true;
	}

	bool Sema::hasExplicitCallingConv(QualType T) {
	const AttributedType *AT;

	// Stop if we'd be stripping off a typedef sugar node to reach the
	// AttributedType.
	while ((AT = T->getAs<AttributedType>()) &&
	AT->getAs<TypedefType>() == T->getAs<TypedefType>()) {
	if (AT->isCallingConv())
	return true;
	T = AT->getModifiedType();
	}
	return false;
	}

	void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor,
	SourceLocation Loc) {
	FunctionTypeUnwrapper Unwrapped(*this, T);
	const FunctionType *FT = Unwrapped.get();
	bool IsVariadic = (isa<FunctionProtoType>(FT) &&
	cast<FunctionProtoType>(FT)->isVariadic());
	CallingConv CurCC = FT->getCallConv();
	CallingConv ToCC = Context.getDefaultCallingConvention(IsVariadic, !IsStatic);

	if (CurCC == ToCC)
	return;

	// MS compiler ignores explicit calling convention attributes on structors. We
	// should do the same.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft() && IsCtorOrDtor) {
	// Issue a warning on ignored calling convention -- except of __stdcall.
	// Again, this is what MS compiler does.
	if (CurCC != CC_X86StdCall)
	Diag(Loc, diag::warn_cconv_unsupported)
	<< FunctionType::getNameForCallConv(CurCC)
	<< (int)Sema::CallingConventionIgnoredReason::ConstructorDestructor;
	// Default adjustment.
	} else {
	// Only adjust types with the default convention. For example, on Windows
	// we should adjust a __cdecl type to __thiscall for instance methods, and a
	// __thiscall type to __cdecl for static methods.
	CallingConv DefaultCC =
	Context.getDefaultCallingConvention(IsVariadic, IsStatic);

	if (CurCC != DefaultCC \|\| DefaultCC == ToCC)
	return;

	if (hasExplicitCallingConv(T))
	return;
	}

	FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(ToCC));
	QualType Wrapped = Unwrapped.wrap(*this, FT);
	T = Context.getAdjustedType(T, Wrapped);
	}

	/// HandleVectorSizeAttribute - this attribute is only applicable to integral
	/// and float scalars, although arrays, pointers, and function return values are
	/// allowed in conjunction with this construct. Aggregates with this attribute
	/// are invalid, even if they are of the same size as a corresponding scalar.
	/// The raw attribute should contain precisely 1 argument, the vector size for
	/// the variable, measured in bytes. If curType and rawAttr are well formed,
	/// this routine will return a new vector type.
	static void HandleVectorSizeAttr(QualType &CurType, const ParsedAttr &Attr,
	Sema &S) {
	// Check the attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
	<< 1;
	Attr.setInvalid();
	return;
	}

	Expr *SizeExpr = Attr.getArgAsExpr(0);
	QualType T = S.BuildVectorType(CurType, SizeExpr, Attr.getLoc());
	if (!T.isNull())
	CurType = T;
	else
	Attr.setInvalid();
	}

	/// Process the OpenCL-like ext_vector_type attribute when it occurs on
	/// a type.
	static void HandleExtVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
	Sema &S) {
	// check the attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
	<< 1;
	return;
	}

	Expr *SizeExpr = Attr.getArgAsExpr(0);
	QualType T = S.BuildExtVectorType(CurType, SizeExpr, Attr.getLoc());
	if (!T.isNull())
	CurType = T;
	}

	static bool isPermittedNeonBaseType(QualType &Ty,
	VectorType::VectorKind VecKind, Sema &S) {
	const BuiltinType *BTy = Ty->getAs<BuiltinType>();
	if (!BTy)
	return false;

	llvm::Triple Triple = S.Context.getTargetInfo().getTriple();

	// Signed poly is mathematically wrong, but has been baked into some ABIs by
	// now.
	bool IsPolyUnsigned = Triple.getArch() == llvm::Triple::aarch64 \|\|
	Triple.getArch() == llvm::Triple::aarch64_32 \|\|
	Triple.getArch() == llvm::Triple::aarch64_be;
	if (VecKind == VectorType::NeonPolyVector) {
	if (IsPolyUnsigned) {
	// AArch64 polynomial vectors are unsigned.
	return BTy->getKind() == BuiltinType::UChar \|\|
	BTy->getKind() == BuiltinType::UShort \|\|
	BTy->getKind() == BuiltinType::ULong \|\|
	BTy->getKind() == BuiltinType::ULongLong;
	} else {
	// AArch32 polynomial vectors are signed.
	return BTy->getKind() == BuiltinType::SChar \|\|
	BTy->getKind() == BuiltinType::Short \|\|
	BTy->getKind() == BuiltinType::LongLong;
	}
	}

	// Non-polynomial vector types: the usual suspects are allowed, as well as
	// float64_t on AArch64.
	if ((Triple.isArch64Bit() \|\| Triple.getArch() == llvm::Triple::aarch64_32) &&
	BTy->getKind() == BuiltinType::Double)
	return true;

	return BTy->getKind() == BuiltinType::SChar \|\|
	BTy->getKind() == BuiltinType::UChar \|\|
	BTy->getKind() == BuiltinType::Short \|\|
	BTy->getKind() == BuiltinType::UShort \|\|
	BTy->getKind() == BuiltinType::Int \|\|
	BTy->getKind() == BuiltinType::UInt \|\|
	BTy->getKind() == BuiltinType::Long \|\|
	BTy->getKind() == BuiltinType::ULong \|\|
	BTy->getKind() == BuiltinType::LongLong \|\|
	BTy->getKind() == BuiltinType::ULongLong \|\|
	BTy->getKind() == BuiltinType::Float \|\|
	BTy->getKind() == BuiltinType::Half \|\|
	BTy->getKind() == BuiltinType::BFloat16;
	}

	static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
	llvm::APSInt &Result) {
	const auto *AttrExpr = Attr.getArgAsExpr(0);
	if (!AttrExpr->isTypeDependent()) {
	if (Optional<llvm::APSInt> Res =
	AttrExpr->getIntegerConstantExpr(S.Context)) {
	Result = *Res;
	return true;
	}
	}
	S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
	<< Attr << AANT_ArgumentIntegerConstant << AttrExpr->getSourceRange();
	Attr.setInvalid();
	return false;
	}

	/// HandleNeonVectorTypeAttr - The "neon_vector_type" and
	/// "neon_polyvector_type" attributes are used to create vector types that
	/// are mangled according to ARM's ABI. Otherwise, these types are identical
	/// to those created with the "vector_size" attribute. Unlike "vector_size"
	/// the argument to these Neon attributes is the number of vector elements,
	/// not the vector size in bytes. The vector width and element type must
	/// match one of the standard Neon vector types.
	static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
	Sema &S, VectorType::VectorKind VecKind) {
	// Target must have NEON (or MVE, whose vectors are similar enough
	// not to need a separate attribute)
	if (!S.Context.getTargetInfo().hasFeature("neon") &&
	!S.Context.getTargetInfo().hasFeature("mve")) {
	S.Diag(Attr.getLoc(), diag::err_attribute_unsupported)
	<< Attr << "'neon' or 'mve'";
	Attr.setInvalid();
	return;
	}
	// Check the attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr
	<< 1;
	Attr.setInvalid();
	return;
	}
	// The number of elements must be an ICE.
	llvm::APSInt numEltsInt(32);
	if (!verifyValidIntegerConstantExpr(S, Attr, numEltsInt))
	return;

	// Only certain element types are supported for Neon vectors.
	if (!isPermittedNeonBaseType(CurType, VecKind, S)) {
	S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
	Attr.setInvalid();
	return;
	}

	// The total size of the vector must be 64 or 128 bits.
	unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
	unsigned numElts = static_cast<unsigned>(numEltsInt.getZExtValue());
	unsigned vecSize = typeSize * numElts;
	if (vecSize != 64 && vecSize != 128) {
	S.Diag(Attr.getLoc(), diag::err_attribute_bad_neon_vector_size) << CurType;
	Attr.setInvalid();
	return;
	}

	CurType = S.Context.getVectorType(CurType, numElts, VecKind);
	}

	/// HandleArmSveVectorBitsTypeAttr - The "arm_sve_vector_bits" attribute is
	/// used to create fixed-length versions of sizeless SVE types defined by
	/// the ACLE, such as svint32_t and svbool_t.
	static void HandleArmSveVectorBitsTypeAttr(QualType &CurType, ParsedAttr &Attr,
	Sema &S) {
	// Target must have SVE.
	if (!S.Context.getTargetInfo().hasFeature("sve")) {
	S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr << "'sve'";
	Attr.setInvalid();
	return;
	}

	// Attribute is unsupported if '-msve-vector-bits=<bits>' isn't specified, or
	// if <bits>+ syntax is used.
	if (!S.getLangOpts().VScaleMin \|\|
	S.getLangOpts().VScaleMin != S.getLangOpts().VScaleMax) {
	S.Diag(Attr.getLoc(), diag::err_attribute_arm_feature_sve_bits_unsupported)
	<< Attr;
	Attr.setInvalid();
	return;
	}

	// Check the attribute arguments.
	if (Attr.getNumArgs() != 1) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
	<< Attr << 1;
	Attr.setInvalid();
	return;
	}

	// The vector size must be an integer constant expression.
	llvm::APSInt SveVectorSizeInBits(32);
	if (!verifyValidIntegerConstantExpr(S, Attr, SveVectorSizeInBits))
	return;

	unsigned VecSize = static_cast<unsigned>(SveVectorSizeInBits.getZExtValue());

	// The attribute vector size must match -msve-vector-bits.
	if (VecSize != S.getLangOpts().VScaleMin * 128) {
	S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)
	<< VecSize << S.getLangOpts().VScaleMin * 128;
	Attr.setInvalid();
	return;
	}

	// Attribute can only be attached to a single SVE vector or predicate type.
	if (!CurType->isVLSTBuiltinType()) {
	S.Diag(Attr.getLoc(), diag::err_attribute_invalid_sve_type)
	<< Attr << CurType;
	Attr.setInvalid();
	return;
	}

	const auto *BT = CurType->castAs<BuiltinType>();

	QualType EltType = CurType->getSveEltType(S.Context);
	unsigned TypeSize = S.Context.getTypeSize(EltType);
	VectorType::VectorKind VecKind = VectorType::SveFixedLengthDataVector;
	if (BT->getKind() == BuiltinType::SveBool) {
	// Predicates are represented as i8.
	VecSize /= S.Context.getCharWidth() * S.Context.getCharWidth();
	VecKind = VectorType::SveFixedLengthPredicateVector;
	} else
	VecSize /= TypeSize;
	CurType = S.Context.getVectorType(EltType, VecSize, VecKind);
	}

	static void HandleArmMveStrictPolymorphismAttr(TypeProcessingState &State,
	QualType &CurType,
	ParsedAttr &Attr) {
	const VectorType *VT = dyn_cast<VectorType>(CurType);
	if (!VT \|\| VT->getVectorKind() != VectorType::NeonVector) {
	State.getSema().Diag(Attr.getLoc(),
	diag::err_attribute_arm_mve_polymorphism);
	Attr.setInvalid();
	return;
	}

	CurType =
	State.getAttributedType(createSimpleAttr<ArmMveStrictPolymorphismAttr>(
	State.getSema().Context, Attr),
	CurType, CurType);
	}

	/// Handle OpenCL Access Qualifier Attribute.
	static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr,
	Sema &S) {
	// OpenCL v2.0 s6.6 - Access qualifier can be used only for image and pipe type.
	if (!(CurType->isImageType() \|\| CurType->isPipeType())) {
	S.Diag(Attr.getLoc(), diag::err_opencl_invalid_access_qualifier);
	Attr.setInvalid();
	return;
	}

	if (const TypedefType* TypedefTy = CurType->getAs<TypedefType>()) {
	QualType BaseTy = TypedefTy->desugar();

	std::string PrevAccessQual;
	if (BaseTy->isPipeType()) {
	if (TypedefTy->getDecl()->hasAttr<OpenCLAccessAttr>()) {
	OpenCLAccessAttr *Attr =
	TypedefTy->getDecl()->getAttr<OpenCLAccessAttr>();
	PrevAccessQual = Attr->getSpelling();
	} else {
	PrevAccessQual = "read_only";
	}
	} else if (const BuiltinType* ImgType = BaseTy->getAs<BuiltinType>()) {

	switch (ImgType->getKind()) {
	#define IMAGE_TYPE(ImgType, Id, SingletonId, Access, Suffix) \
	case BuiltinType::Id: \
	PrevAccessQual = #Access; \
	break;
	#include "clang/Basic/OpenCLImageTypes.def"
	default:
	llvm_unreachable("Unable to find corresponding image type.");
	}
	} else {
	llvm_unreachable("unexpected type");
	}
	StringRef AttrName = Attr.getAttrName()->getName();
	if (PrevAccessQual == AttrName.ltrim("_")) {
	// Duplicated qualifiers
	S.Diag(Attr.getLoc(), diag::warn_duplicate_declspec)
	<< AttrName << Attr.getRange();
	} else {
	// Contradicting qualifiers
	S.Diag(Attr.getLoc(), diag::err_opencl_multiple_access_qualifiers);
	}

	S.Diag(TypedefTy->getDecl()->getBeginLoc(),
	diag::note_opencl_typedef_access_qualifier) << PrevAccessQual;
	} else if (CurType->isPipeType()) {
	if (Attr.getSemanticSpelling() == OpenCLAccessAttr::Keyword_write_only) {
	QualType ElemType = CurType->castAs<PipeType>()->getElementType();
	CurType = S.Context.getWritePipeType(ElemType);
	}
	}
	}

	/// HandleMatrixTypeAttr - "matrix_type" attribute, like ext_vector_type
	static void HandleMatrixTypeAttr(QualType &CurType, const ParsedAttr &Attr,
	Sema &S) {
	if (!S.getLangOpts().MatrixTypes) {
	S.Diag(Attr.getLoc(), diag::err_builtin_matrix_disabled);
	return;
	}

	if (Attr.getNumArgs() != 2) {
	S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
	<< Attr << 2;
	return;
	}

	Expr *RowsExpr = Attr.getArgAsExpr(0);
	Expr *ColsExpr = Attr.getArgAsExpr(1);
	QualType T = S.BuildMatrixType(CurType, RowsExpr, ColsExpr, Attr.getLoc());
	if (!T.isNull())
	CurType = T;
	}

	static void HandleAnnotateTypeAttr(TypeProcessingState &State,
	QualType &CurType, const ParsedAttr &PA) {
	Sema &S = State.getSema();

	if (PA.getNumArgs() < 1) {
	S.Diag(PA.getLoc(), diag::err_attribute_too_few_arguments) << PA << 1;
	return;
	}

	// Make sure that there is a string literal as the annotation's first
	// argument.
	StringRef Str;
	if (!S.checkStringLiteralArgumentAttr(PA, 0, Str))
	return;

	llvm::SmallVector<Expr *, 4> Args;
	Args.reserve(PA.getNumArgs() - 1);
	for (unsigned Idx = 1; Idx < PA.getNumArgs(); Idx++) {
	assert(!PA.isArgIdent(Idx));
	Args.push_back(PA.getArgAsExpr(Idx));
	}
	if (!S.ConstantFoldAttrArgs(PA, Args))
	return;
	auto *AnnotateTypeAttr =
	AnnotateTypeAttr::Create(S.Context, Str, Args.data(), Args.size(), PA);
	CurType = State.getAttributedType(AnnotateTypeAttr, CurType, CurType);
	}

	static void HandleLifetimeBoundAttr(TypeProcessingState &State,
	QualType &CurType,
	ParsedAttr &Attr) {
	if (State.getDeclarator().isDeclarationOfFunction()) {
	CurType = State.getAttributedType(
	createSimpleAttr<LifetimeBoundAttr>(State.getSema().Context, Attr),
	CurType, CurType);
	}
	}

	static void processTypeAttrs(TypeProcessingState &state, QualType &type,
	TypeAttrLocation TAL,
	const ParsedAttributesView &attrs) {

	state.setParsedNoDeref(false);
	if (attrs.empty())
	return;

	// Scan through and apply attributes to this type where it makes sense. Some
	// attributes (such as __address_space__, __vector_size__, etc) apply to the
	// type, but others can be present in the type specifiers even though they
	// apply to the decl. Here we apply type attributes and ignore the rest.

	// This loop modifies the list pretty frequently, but we still need to make
	// sure we visit every element once. Copy the attributes list, and iterate
	// over that.
	ParsedAttributesView AttrsCopy{attrs};
	for (ParsedAttr &attr : AttrsCopy) {

	// Skip attributes that were marked to be invalid.
	if (attr.isInvalid())
	continue;

	if (attr.isStandardAttributeSyntax()) {
	// [[gnu::...]] attributes are treated as declaration attributes, so may
	// not appertain to a DeclaratorChunk. If we handle them as type
	// attributes, accept them in that position and diagnose the GCC
	// incompatibility.
	if (attr.isGNUScope()) {
	bool IsTypeAttr = attr.isTypeAttr();
	if (TAL == TAL_DeclChunk) {
	state.getSema().Diag(attr.getLoc(),
	IsTypeAttr
	? diag::warn_gcc_ignores_type_attr
	: diag::warn_cxx11_gnu_attribute_on_type)
	<< attr;
	if (!IsTypeAttr)
	continue;
	}
	} else if (TAL != TAL_DeclSpec && TAL != TAL_DeclChunk &&
	!attr.isTypeAttr()) {
	// Otherwise, only consider type processing for a C++11 attribute if
	// - it has actually been applied to a type (decl-specifier-seq or
	// declarator chunk), or
	// - it is a type attribute, irrespective of where it was applied (so
	// that we can support the legacy behavior of some type attributes
	// that can be applied to the declaration name).
	continue;
	}
	}

	// If this is an attribute we can handle, do so now,
	// otherwise, add it to the FnAttrs list for rechaining.
	switch (attr.getKind()) {
	default:
	// A [[]] attribute on a declarator chunk must appertain to a type.
	if (attr.isStandardAttributeSyntax() && TAL == TAL_DeclChunk) {
	state.getSema().Diag(attr.getLoc(), diag::err_attribute_not_type_attr)
	<< attr;
	attr.setUsedAsTypeAttr();
	}
	break;

	case ParsedAttr::UnknownAttribute:
	if (attr.isStandardAttributeSyntax()) {
	state.getSema().Diag(attr.getLoc(),
	diag::warn_unknown_attribute_ignored)
	<< attr << attr.getRange();
	// Mark the attribute as invalid so we don't emit the same diagnostic
	// multiple times.
	attr.setInvalid();
	}
	break;

	case ParsedAttr::IgnoredAttribute:
	break;

	case ParsedAttr::AT_BTFTypeTag:
	HandleBTFTypeTagAttribute(type, attr, state);
	attr.setUsedAsTypeAttr();
	break;

	case ParsedAttr::AT_MayAlias:
	// FIXME: This attribute needs to actually be handled, but if we ignore
	// it it breaks large amounts of Linux software.
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_OpenCLPrivateAddressSpace:
	case ParsedAttr::AT_OpenCLGlobalAddressSpace:
	case ParsedAttr::AT_OpenCLGlobalDeviceAddressSpace:
	case ParsedAttr::AT_OpenCLGlobalHostAddressSpace:
	case ParsedAttr::AT_OpenCLLocalAddressSpace:
	case ParsedAttr::AT_OpenCLConstantAddressSpace:
	case ParsedAttr::AT_OpenCLGenericAddressSpace:
	case ParsedAttr::AT_AddressSpace:
	HandleAddressSpaceTypeAttribute(type, attr, state);
	attr.setUsedAsTypeAttr();
	break;
	OBJC_POINTER_TYPE_ATTRS_CASELIST:
	if (!handleObjCPointerTypeAttr(state, attr, type))
	distributeObjCPointerTypeAttr(state, attr, type);
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_VectorSize:
	HandleVectorSizeAttr(type, attr, state.getSema());
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_ExtVectorType:
	HandleExtVectorTypeAttr(type, attr, state.getSema());
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_NeonVectorType:
	HandleNeonVectorTypeAttr(type, attr, state.getSema(),
	VectorType::NeonVector);
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_NeonPolyVectorType:
	HandleNeonVectorTypeAttr(type, attr, state.getSema(),
	VectorType::NeonPolyVector);
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_ArmSveVectorBits:
	HandleArmSveVectorBitsTypeAttr(type, attr, state.getSema());
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_ArmMveStrictPolymorphism: {
	HandleArmMveStrictPolymorphismAttr(state, type, attr);
	attr.setUsedAsTypeAttr();
	break;
	}
	case ParsedAttr::AT_OpenCLAccess:
	HandleOpenCLAccessAttr(type, attr, state.getSema());
	attr.setUsedAsTypeAttr();
	break;
	case ParsedAttr::AT_LifetimeBound:
	if (TAL == TAL_DeclChunk)
	HandleLifetimeBoundAttr(state, type, attr);
	break;

	case ParsedAttr::AT_NoDeref: {
	// FIXME: `noderef` currently doesn't work correctly in [[]] syntax.
	// See https://github.com/llvm/llvm-project/issues/55790 for details.
	// For the time being, we simply emit a warning that the attribute is
	// ignored.
	if (attr.isStandardAttributeSyntax()) {
	state.getSema().Diag(attr.getLoc(), diag::warn_attribute_ignored)
	<< attr;
	break;
	}
	ASTContext &Ctx = state.getSema().Context;
	type = state.getAttributedType(createSimpleAttr<NoDerefAttr>(Ctx, attr),
	type, type);
	attr.setUsedAsTypeAttr();
	state.setParsedNoDeref(true);
	break;
	}

	case ParsedAttr::AT_MatrixType:
	HandleMatrixTypeAttr(type, attr, state.getSema());
	attr.setUsedAsTypeAttr();
	break;

	MS_TYPE_ATTRS_CASELIST:
	if (!handleMSPointerTypeQualifierAttr(state, attr, type))
	attr.setUsedAsTypeAttr();
	break;


	NULLABILITY_TYPE_ATTRS_CASELIST:
	// Either add nullability here or try to distribute it. We
	// don't want to distribute the nullability specifier past any
	// dependent type, because that complicates the user model.
	if (type->canHaveNullability() \|\| type->isDependentType() \|\|
	type->isArrayType() \|\|
	!distributeNullabilityTypeAttr(state, type, attr)) {
	unsigned endIndex;
	if (TAL == TAL_DeclChunk)
	endIndex = state.getCurrentChunkIndex();
	else
	endIndex = state.getDeclarator().getNumTypeObjects();
	bool allowOnArrayType =
	state.getDeclarator().isPrototypeContext() &&
	!hasOuterPointerLikeChunk(state.getDeclarator(), endIndex);
	if (checkNullabilityTypeSpecifier(
	state,
	type,
	attr,
	allowOnArrayType)) {
	attr.setInvalid();
	}

	attr.setUsedAsTypeAttr();
	}
	break;

	case ParsedAttr::AT_ObjCKindOf:
	// '__kindof' must be part of the decl-specifiers.
	switch (TAL) {
	case TAL_DeclSpec:
	break;

	case TAL_DeclChunk:
	case TAL_DeclName:
	state.getSema().Diag(attr.getLoc(),
	diag::err_objc_kindof_wrong_position)
	<< FixItHint::CreateRemoval(attr.getLoc())
	<< FixItHint::CreateInsertion(
	state.getDeclarator().getDeclSpec().getBeginLoc(),
	"__kindof ");
	break;
	}

	// Apply it regardless.
	if (checkObjCKindOfType(state, type, attr))
	attr.setInvalid();
	break;

	case ParsedAttr::AT_NoThrow:
	// Exception Specifications aren't generally supported in C mode throughout
	// clang, so revert to attribute-based handling for C.
	if (!state.getSema().getLangOpts().CPlusPlus)
	break;
	LLVM_FALLTHROUGH;
	FUNCTION_TYPE_ATTRS_CASELIST:
	attr.setUsedAsTypeAttr();

	// Attributes with standard syntax have strict rules for what they
	// appertain to and hence should not use the "distribution" logic below.
	if (attr.isStandardAttributeSyntax()) {
	if (!handleFunctionTypeAttr(state, attr, type)) {
	diagnoseBadTypeAttribute(state.getSema(), attr, type);
	attr.setInvalid();
	}
	break;
	}

	// Never process function type attributes as part of the
	// declaration-specifiers.
	if (TAL == TAL_DeclSpec)
	distributeFunctionTypeAttrFromDeclSpec(state, attr, type);

	// Otherwise, handle the possible delays.
	else if (!handleFunctionTypeAttr(state, attr, type))
	distributeFunctionTypeAttr(state, attr, type);
	break;
	case ParsedAttr::AT_AcquireHandle: {
	if (!type->isFunctionType())
	return;

	if (attr.getNumArgs() != 1) {
	state.getSema().Diag(attr.getLoc(),
	diag::err_attribute_wrong_number_arguments)
	<< attr << 1;
	attr.setInvalid();
	return;
	}

	StringRef HandleType;
	if (!state.getSema().checkStringLiteralArgumentAttr(attr, 0, HandleType))
	return;
	type = state.getAttributedType(
	AcquireHandleAttr::Create(state.getSema().Context, HandleType, attr),
	type, type);
	attr.setUsedAsTypeAttr();
	break;
	}
	case ParsedAttr::AT_AnnotateType: {
	HandleAnnotateTypeAttr(state, type, attr);
	attr.setUsedAsTypeAttr();
	break;
	}
	}

	// Handle attributes that are defined in a macro. We do not want this to be
	// applied to ObjC builtin attributes.
	if (isa<AttributedType>(type) && attr.hasMacroIdentifier() &&
	!type.getQualifiers().hasObjCLifetime() &&
	!type.getQualifiers().hasObjCGCAttr() &&
	attr.getKind() != ParsedAttr::AT_ObjCGC &&
	attr.getKind() != ParsedAttr::AT_ObjCOwnership) {
	const IdentifierInfo *MacroII = attr.getMacroIdentifier();
	type = state.getSema().Context.getMacroQualifiedType(type, MacroII);
	state.setExpansionLocForMacroQualifiedType(
	cast<MacroQualifiedType>(type.getTypePtr()),
	attr.getMacroExpansionLoc());
	}
	}
	}

	void Sema::completeExprArrayBound(Expr *E) {
	if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
	if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
	if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) {
	auto *Def = Var->getDefinition();
	if (!Def) {
	SourceLocation PointOfInstantiation = E->getExprLoc();
	runWithSufficientStackSpace(PointOfInstantiation, [&] {
	InstantiateVariableDefinition(PointOfInstantiation, Var);
	});
	Def = Var->getDefinition();

	// If we don't already have a point of instantiation, and we managed
	// to instantiate a definition, this is the point of instantiation.
	// Otherwise, we don't request an end-of-TU instantiation, so this is
	// not a point of instantiation.
	// FIXME: Is this really the right behavior?
	if (Var->getPointOfInstantiation().isInvalid() && Def) {
	assert(Var->getTemplateSpecializationKind() ==
	TSK_ImplicitInstantiation &&
	"explicit instantiation with no point of instantiation");
	Var->setTemplateSpecializationKind(
	Var->getTemplateSpecializationKind(), PointOfInstantiation);
	}
	}

	// Update the type to the definition's type both here and within the
	// expression.
	if (Def) {
	DRE->setDecl(Def);
	QualType T = Def->getType();
	DRE->setType(T);
	// FIXME: Update the type on all intervening expressions.
	E->setType(T);
	}

	// We still go on to try to complete the type independently, as it
	// may also require instantiations or diagnostics if it remains
	// incomplete.
	}
	}
	}
	}

	QualType Sema::getCompletedType(Expr *E) {
	// Incomplete array types may be completed by the initializer attached to
	// their definitions. For static data members of class templates and for
	// variable templates, we need to instantiate the definition to get this
	// initializer and complete the type.
	if (E->getType()->isIncompleteArrayType())
	completeExprArrayBound(E);

	// FIXME: Are there other cases which require instantiating something other
	// than the type to complete the type of an expression?

	return E->getType();
	}

	/// Ensure that the type of the given expression is complete.
	///
	/// This routine checks whether the expression \p E has a complete type. If the
	/// expression refers to an instantiable construct, that instantiation is
	/// performed as needed to complete its type. Furthermore
	/// Sema::RequireCompleteType is called for the expression's type (or in the
	/// case of a reference type, the referred-to type).
	///
	/// \param E The expression whose type is required to be complete.
	/// \param Kind Selects which completeness rules should be applied.
	/// \param Diagnoser The object that will emit a diagnostic if the type is
	/// incomplete.
	///
	/// \returns \c true if the type of \p E is incomplete and diagnosed, \c false
	/// otherwise.
	bool Sema::RequireCompleteExprType(Expr *E, CompleteTypeKind Kind,
	TypeDiagnoser &Diagnoser) {
	return RequireCompleteType(E->getExprLoc(), getCompletedType(E), Kind,
	Diagnoser);
	}

	bool Sema::RequireCompleteExprType(Expr *E, unsigned DiagID) {
	BoundTypeDiagnoser<> Diagnoser(DiagID);
	return RequireCompleteExprType(E, CompleteTypeKind::Default, Diagnoser);
	}

	/// Ensure that the type T is a complete type.
	///
	/// This routine checks whether the type @p T is complete in any
	/// context where a complete type is required. If @p T is a complete
	/// type, returns false. If @p T is a class template specialization,
	/// this routine then attempts to perform class template
	/// instantiation. If instantiation fails, or if @p T is incomplete
	/// and cannot be completed, issues the diagnostic @p diag (giving it
	/// the type @p T) and returns true.
	///
	/// @param Loc The location in the source that the incomplete type
	/// diagnostic should refer to.
	///
	/// @param T The type that this routine is examining for completeness.
	///
	/// @param Kind Selects which completeness rules should be applied.
	///
	/// @returns @c true if @p T is incomplete and a diagnostic was emitted,
	/// @c false otherwise.
	bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
	CompleteTypeKind Kind,
	TypeDiagnoser &Diagnoser) {
	if (RequireCompleteTypeImpl(Loc, T, Kind, &Diagnoser))
	return true;
	if (const TagType *Tag = T->getAs<TagType>()) {
	if (!Tag->getDecl()->isCompleteDefinitionRequired()) {
	Tag->getDecl()->setCompleteDefinitionRequired();
	Consumer.HandleTagDeclRequiredDefinition(Tag->getDecl());
	}
	}
	return false;
	}

	bool Sema::hasStructuralCompatLayout(Decl D, Decl Suggested) {
	llvm::DenseSet<std::pair<Decl , Decl >> NonEquivalentDecls;
	if (!Suggested)
	return false;

	// FIXME: Add a specific mode for C11 6.2.7/1 in StructuralEquivalenceContext
	// and isolate from other C++ specific checks.
	StructuralEquivalenceContext Ctx(
	D->getASTContext(), Suggested->getASTContext(), NonEquivalentDecls,
	StructuralEquivalenceKind::Default,
	false /StrictTypeSpelling/, true /Complain/,
	true /ErrorOnTagTypeMismatch/);
	return Ctx.IsEquivalent(D, Suggested);
	}

	bool Sema::hasAcceptableDefinition(NamedDecl D, NamedDecl *Suggested,
	AcceptableKind Kind, bool OnlyNeedComplete) {
	// Easy case: if we don't have modules, all declarations are visible.
	if (!getLangOpts().Modules && !getLangOpts().ModulesLocalVisibility)
	return true;

	// If this definition was instantiated from a template, map back to the
	// pattern from which it was instantiated.
	if (isa<TagDecl>(D) && cast<TagDecl>(D)->isBeingDefined()) {
	// We're in the middle of defining it; this definition should be treated
	// as visible.
	return true;
	} else if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
	if (auto *Pattern = RD->getTemplateInstantiationPattern())
	RD = Pattern;
	D = RD->getDefinition();
	} else if (auto *ED = dyn_cast<EnumDecl>(D)) {
	if (auto *Pattern = ED->getTemplateInstantiationPattern())
	ED = Pattern;
	if (OnlyNeedComplete && (ED->isFixed() \|\| getLangOpts().MSVCCompat)) {
	// If the enum has a fixed underlying type, it may have been forward
	// declared. In -fms-compatibility, `enum Foo;` will also forward declare
	// the enum and assign it the underlying type of `int`. Since we're only
	// looking for a complete type (not a definition), any visible declaration
	// of it will do.
	*Suggested = nullptr;
	for (auto *Redecl : ED->redecls()) {
	if (isAcceptable(Redecl, Kind))
	return true;
	if (Redecl->isThisDeclarationADefinition() \|\|
	(Redecl->isCanonicalDecl() && !*Suggested))
	*Suggested = Redecl;
	}

	return false;
	}
	D = ED->getDefinition();
	} else if (auto *FD = dyn_cast<FunctionDecl>(D)) {
	if (auto *Pattern = FD->getTemplateInstantiationPattern())
	FD = Pattern;
	D = FD->getDefinition();
	} else if (auto *VD = dyn_cast<VarDecl>(D)) {
	if (auto *Pattern = VD->getTemplateInstantiationPattern())
	VD = Pattern;
	D = VD->getDefinition();
	}

	assert(D && "missing definition for pattern of instantiated definition");

	*Suggested = D;

	auto DefinitionIsAcceptable = [&] {
	// The (primary) definition might be in a visible module.
	if (isAcceptable(D, Kind))
	return true;

	// A visible module might have a merged definition instead.
	if (D->isModulePrivate() ? hasMergedDefinitionInCurrentModule(D)
	: hasVisibleMergedDefinition(D)) {
	if (CodeSynthesisContexts.empty() &&
	!getLangOpts().ModulesLocalVisibility) {
	// Cache the fact that this definition is implicitly visible because
	// there is a visible merged definition.
	D->setVisibleDespiteOwningModule();
	}
	return true;
	}

	return false;
	};

	if (DefinitionIsAcceptable())
	return true;

	// The external source may have additional definitions of this entity that are
	// visible, so complete the redeclaration chain now and ask again.
	if (auto *Source = Context.getExternalSource()) {
	Source->CompleteRedeclChain(D);
	return DefinitionIsAcceptable();
	}

	return false;
	}

	/// Determine whether there is any declaration of \p D that was ever a
	/// definition (perhaps before module merging) and is currently visible.
	/// \param D The definition of the entity.
	/// \param Suggested Filled in with the declaration that should be made visible
	/// in order to provide a definition of this entity.
	/// \param OnlyNeedComplete If \c true, we only need the type to be complete,
	/// not defined. This only matters for enums with a fixed underlying
	/// type, since in all other cases, a type is complete if and only if it
	/// is defined.
	bool Sema::hasVisibleDefinition(NamedDecl D, NamedDecl *Suggested,
	bool OnlyNeedComplete) {
	return hasAcceptableDefinition(D, Suggested, Sema::AcceptableKind::Visible,
	OnlyNeedComplete);
	}

	/// Determine whether there is any declaration of \p D that was ever a
	/// definition (perhaps before module merging) and is currently
	/// reachable.
	/// \param D The definition of the entity.
	/// \param Suggested Filled in with the declaration that should be made
	/// reachable
	/// in order to provide a definition of this entity.
	/// \param OnlyNeedComplete If \c true, we only need the type to be complete,
	/// not defined. This only matters for enums with a fixed underlying
	/// type, since in all other cases, a type is complete if and only if it
	/// is defined.
	bool Sema::hasReachableDefinition(NamedDecl D, NamedDecl *Suggested,
	bool OnlyNeedComplete) {
	return hasAcceptableDefinition(D, Suggested, Sema::AcceptableKind::Reachable,
	OnlyNeedComplete);
	}

	/// Locks in the inheritance model for the given class and all of its bases.
	static void assignInheritanceModel(Sema &S, CXXRecordDecl *RD) {
	RD = RD->getMostRecentNonInjectedDecl();
	if (!RD->hasAttr<MSInheritanceAttr>()) {
	MSInheritanceModel IM;
	bool BestCase = false;
	switch (S.MSPointerToMemberRepresentationMethod) {
	case LangOptions::PPTMK_BestCase:
	BestCase = true;
	IM = RD->calculateInheritanceModel();
	break;
	case LangOptions::PPTMK_FullGeneralitySingleInheritance:
	IM = MSInheritanceModel::Single;
	break;
	case LangOptions::PPTMK_FullGeneralityMultipleInheritance:
	IM = MSInheritanceModel::Multiple;
	break;
	case LangOptions::PPTMK_FullGeneralityVirtualInheritance:
	IM = MSInheritanceModel::Unspecified;
	break;
	}

	SourceRange Loc = S.ImplicitMSInheritanceAttrLoc.isValid()
	? S.ImplicitMSInheritanceAttrLoc
	: RD->getSourceRange();
	RD->addAttr(MSInheritanceAttr::CreateImplicit(
	S.getASTContext(), BestCase, Loc, AttributeCommonInfo::AS_Microsoft,
	MSInheritanceAttr::Spelling(IM)));
	S.Consumer.AssignInheritanceModel(RD);
	}
	}

	/// The implementation of RequireCompleteType
	bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
	CompleteTypeKind Kind,
	TypeDiagnoser *Diagnoser) {
	// FIXME: Add this assertion to make sure we always get instantiation points.
	// assert(!Loc.isInvalid() && "Invalid location in RequireCompleteType");
	// FIXME: Add this assertion to help us flush out problems with
	// checking for dependent types and type-dependent expressions.
	//
	// assert(!T->isDependentType() &&
	// "Can't ask whether a dependent type is complete");

	if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>()) {
	if (!MPTy->getClass()->isDependentType()) {
	if (getLangOpts().CompleteMemberPointers &&
	!MPTy->getClass()->getAsCXXRecordDecl()->isBeingDefined() &&
	RequireCompleteType(Loc, QualType(MPTy->getClass(), 0), Kind,
	diag::err_memptr_incomplete))
	return true;

	// We lock in the inheritance model once somebody has asked us to ensure
	// that a pointer-to-member type is complete.
	if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
	(void)isCompleteType(Loc, QualType(MPTy->getClass(), 0));
	assignInheritanceModel(*this, MPTy->getMostRecentCXXRecordDecl());
	}
	}
	}

	NamedDecl *Def = nullptr;
	bool AcceptSizeless = (Kind == CompleteTypeKind::AcceptSizeless);
	bool Incomplete = (T->isIncompleteType(&Def) \|\|
	(!AcceptSizeless && T->isSizelessBuiltinType()));

	// Check that any necessary explicit specializations are visible. For an
	// enum, we just need the declaration, so don't check this.
	if (Def && !isa<EnumDecl>(Def))
	checkSpecializationReachability(Loc, Def);

	// If we have a complete type, we're done.
	if (!Incomplete) {
	NamedDecl *Suggested = nullptr;
	if (Def &&
	!hasReachableDefinition(Def, &Suggested, /OnlyNeedComplete=/true)) {
	// If the user is going to see an error here, recover by making the
	// definition visible.
	bool TreatAsComplete = Diagnoser && !isSFINAEContext();
	if (Diagnoser && Suggested)
	diagnoseMissingImport(Loc, Suggested, MissingImportKind::Definition,
	/Recover/ TreatAsComplete);
	return !TreatAsComplete;
	} else if (Def && !TemplateInstCallbacks.empty()) {
	CodeSynthesisContext TempInst;
	TempInst.Kind = CodeSynthesisContext::Memoization;
	TempInst.Template = Def;
	TempInst.Entity = Def;
	TempInst.PointOfInstantiation = Loc;
	atTemplateBegin(TemplateInstCallbacks, *this, TempInst);
	atTemplateEnd(TemplateInstCallbacks, *this, TempInst);
	}

	return false;
	}

	TagDecl *Tag = dyn_cast_or_null<TagDecl>(Def);
	ObjCInterfaceDecl *IFace = dyn_cast_or_null<ObjCInterfaceDecl>(Def);

	// Give the external source a chance to provide a definition of the type.
	// This is kept separate from completing the redeclaration chain so that
	// external sources such as LLDB can avoid synthesizing a type definition
	// unless it's actually needed.
	if (Tag \|\| IFace) {
	// Avoid diagnosing invalid decls as incomplete.
	if (Def->isInvalidDecl())
	return true;

	// Give the external AST source a chance to complete the type.
	if (auto *Source = Context.getExternalSource()) {
	if (Tag && Tag->hasExternalLexicalStorage())
	Source->CompleteType(Tag);
	if (IFace && IFace->hasExternalLexicalStorage())
	Source->CompleteType(IFace);
	// If the external source completed the type, go through the motions
	// again to ensure we're allowed to use the completed type.
	if (!T->isIncompleteType())
	return RequireCompleteTypeImpl(Loc, T, Kind, Diagnoser);
	}
	}

	// If we have a class template specialization or a class member of a
	// class template specialization, or an array with known size of such,
	// try to instantiate it.
	if (auto *RD = dyn_cast_or_null<CXXRecordDecl>(Tag)) {
	bool Instantiated = false;
	bool Diagnosed = false;
	if (RD->isDependentContext()) {
	// Don't try to instantiate a dependent class (eg, a member template of
	// an instantiated class template specialization).
	// FIXME: Can this ever happen?
	} else if (auto *ClassTemplateSpec =
	dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
	if (ClassTemplateSpec->getSpecializationKind() == TSK_Undeclared) {
	runWithSufficientStackSpace(Loc, [&] {
	Diagnosed = InstantiateClassTemplateSpecialization(
	Loc, ClassTemplateSpec, TSK_ImplicitInstantiation,
	/Complain=/Diagnoser);
	});
	Instantiated = true;
	}
	} else {
	CXXRecordDecl *Pattern = RD->getInstantiatedFromMemberClass();
	if (!RD->isBeingDefined() && Pattern) {
	MemberSpecializationInfo *MSI = RD->getMemberSpecializationInfo();
	assert(MSI && "Missing member specialization information?");
	// This record was instantiated from a class within a template.
	if (MSI->getTemplateSpecializationKind() !=
	TSK_ExplicitSpecialization) {
	runWithSufficientStackSpace(Loc, [&] {
	Diagnosed = InstantiateClass(Loc, RD, Pattern,
	getTemplateInstantiationArgs(RD),
	TSK_ImplicitInstantiation,
	/Complain=/Diagnoser);
	});
	Instantiated = true;
	}
	}
	}

	if (Instantiated) {
	// Instantiate* might have already complained that the template is not
	// defined, if we asked it to.
	if (Diagnoser && Diagnosed)
	return true;
	// If we instantiated a definition, check that it's usable, even if
	// instantiation produced an error, so that repeated calls to this
	// function give consistent answers.
	if (!T->isIncompleteType())
	return RequireCompleteTypeImpl(Loc, T, Kind, Diagnoser);
	}
	}

	// FIXME: If we didn't instantiate a definition because of an explicit
	// specialization declaration, check that it's visible.

	if (!Diagnoser)
	return true;

	Diagnoser->diagnose(*this, Loc, T);

	// If the type was a forward declaration of a class/struct/union
	// type, produce a note.
	if (Tag && !Tag->isInvalidDecl() && !Tag->getLocation().isInvalid())
	Diag(Tag->getLocation(),
	Tag->isBeingDefined() ? diag::note_type_being_defined
	: diag::note_forward_declaration)
	<< Context.getTagDeclType(Tag);

	// If the Objective-C class was a forward declaration, produce a note.
	if (IFace && !IFace->isInvalidDecl() && !IFace->getLocation().isInvalid())
	Diag(IFace->getLocation(), diag::note_forward_class);

	// If we have external information that we can use to suggest a fix,
	// produce a note.
	if (ExternalSource)
	ExternalSource->MaybeDiagnoseMissingCompleteType(Loc, T);

	return true;
	}

	bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
	CompleteTypeKind Kind, unsigned DiagID) {
	BoundTypeDiagnoser<> Diagnoser(DiagID);
	return RequireCompleteType(Loc, T, Kind, Diagnoser);
	}

	/// Get diagnostic %select index for tag kind for
	/// literal type diagnostic message.
	/// WARNING: Indexes apply to particular diagnostics only!
	///
	/// \returns diagnostic %select index.
	static unsigned getLiteralDiagFromTagKind(TagTypeKind Tag) {
	switch (Tag) {
	case TTK_Struct: return 0;
	case TTK_Interface: return 1;
	case TTK_Class: return 2;
	default: llvm_unreachable("Invalid tag kind for literal type diagnostic!");
	}
	}

	/// Ensure that the type T is a literal type.
	///
	/// This routine checks whether the type @p T is a literal type. If @p T is an
	/// incomplete type, an attempt is made to complete it. If @p T is a literal
	/// type, or @p AllowIncompleteType is true and @p T is an incomplete type,
	/// returns false. Otherwise, this routine issues the diagnostic @p PD (giving
	/// it the type @p T), along with notes explaining why the type is not a
	/// literal type, and returns true.
	///
	/// @param Loc The location in the source that the non-literal type
	/// diagnostic should refer to.
	///
	/// @param T The type that this routine is examining for literalness.
	///
	/// @param Diagnoser Emits a diagnostic if T is not a literal type.
	///
	/// @returns @c true if @p T is not a literal type and a diagnostic was emitted,
	/// @c false otherwise.
	bool Sema::RequireLiteralType(SourceLocation Loc, QualType T,
	TypeDiagnoser &Diagnoser) {
	assert(!T->isDependentType() && "type should not be dependent");

	QualType ElemType = Context.getBaseElementType(T);
	if ((isCompleteType(Loc, ElemType) \|\| ElemType->isVoidType()) &&
	T->isLiteralType(Context))
	return false;

	Diagnoser.diagnose(*this, Loc, T);

	if (T->isVariableArrayType())
	return true;

	const RecordType *RT = ElemType->getAs<RecordType>();
	if (!RT)
	return true;

	const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());

	// A partially-defined class type can't be a literal type, because a literal
	// class type must have a trivial destructor (which can't be checked until
	// the class definition is complete).
	if (RequireCompleteType(Loc, ElemType, diag::note_non_literal_incomplete, T))
	return true;

	// [expr.prim.lambda]p3:
	// This class type is [not] a literal type.
	if (RD->isLambda() && !getLangOpts().CPlusPlus17) {
	Diag(RD->getLocation(), diag::note_non_literal_lambda);
	return true;
	}

	// If the class has virtual base classes, then it's not an aggregate, and
	// cannot have any constexpr constructors or a trivial default constructor,
	// so is non-literal. This is better to diagnose than the resulting absence
	// of constexpr constructors.
	if (RD->getNumVBases()) {
	Diag(RD->getLocation(), diag::note_non_literal_virtual_base)
	<< getLiteralDiagFromTagKind(RD->getTagKind()) << RD->getNumVBases();
	for (const auto &I : RD->vbases())
	Diag(I.getBeginLoc(), diag::note_constexpr_virtual_base_here)
	<< I.getSourceRange();
	} else if (!RD->isAggregate() && !RD->hasConstexprNonCopyMoveConstructor() &&
	!RD->hasTrivialDefaultConstructor()) {
	Diag(RD->getLocation(), diag::note_non_literal_no_constexpr_ctors) << RD;
	} else if (RD->hasNonLiteralTypeFieldsOrBases()) {
	for (const auto &I : RD->bases()) {
	if (!I.getType()->isLiteralType(Context)) {
	Diag(I.getBeginLoc(), diag::note_non_literal_base_class)
	<< RD << I.getType() << I.getSourceRange();
	return true;
	}
	}
	for (const auto *I : RD->fields()) {
	if (!I->getType()->isLiteralType(Context) \|\|
	I->getType().isVolatileQualified()) {
	Diag(I->getLocation(), diag::note_non_literal_field)
	<< RD << I << I->getType()
	<< I->getType().isVolatileQualified();
	return true;
	}
	}
	} else if (getLangOpts().CPlusPlus20 ? !RD->hasConstexprDestructor()
	: !RD->hasTrivialDestructor()) {
	// All fields and bases are of literal types, so have trivial or constexpr
	// destructors. If this class's destructor is non-trivial / non-constexpr,
	// it must be user-declared.
	CXXDestructorDecl *Dtor = RD->getDestructor();
	assert(Dtor && "class has literal fields and bases but no dtor?");
	if (!Dtor)
	return true;

	if (getLangOpts().CPlusPlus20) {
	Diag(Dtor->getLocation(), diag::note_non_literal_non_constexpr_dtor)
	<< RD;
	} else {
	Diag(Dtor->getLocation(), Dtor->isUserProvided()
	? diag::note_non_literal_user_provided_dtor
	: diag::note_non_literal_nontrivial_dtor)
	<< RD;
	if (!Dtor->isUserProvided())
	SpecialMemberIsTrivial(Dtor, CXXDestructor, TAH_IgnoreTrivialABI,
	/Diagnose/ true);
	}
	}

	return true;
	}

	bool Sema::RequireLiteralType(SourceLocation Loc, QualType T, unsigned DiagID) {
	BoundTypeDiagnoser<> Diagnoser(DiagID);
	return RequireLiteralType(Loc, T, Diagnoser);
	}

	/// Retrieve a version of the type 'T' that is elaborated by Keyword, qualified
	/// by the nested-name-specifier contained in SS, and that is (re)declared by
	/// OwnedTagDecl, which is nullptr if this is not a (re)declaration.
	QualType Sema::getElaboratedType(ElaboratedTypeKeyword Keyword,
	const CXXScopeSpec &SS, QualType T,
	TagDecl *OwnedTagDecl) {
	if (T.isNull())
	return T;
	NestedNameSpecifier *NNS;
	if (SS.isValid())
	NNS = SS.getScopeRep();
	else {
	if (Keyword == ETK_None)
	return T;
	NNS = nullptr;
	}
	return Context.getElaboratedType(Keyword, NNS, T, OwnedTagDecl);
	}

	QualType Sema::BuildTypeofExprType(Expr *E) {
	assert(!E->hasPlaceholderType() && "unexpected placeholder");

	if (!getLangOpts().CPlusPlus && E->refersToBitField())
	Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 2;

	if (!E->isTypeDependent()) {
	QualType T = E->getType();
	if (const TagType *TT = T->getAs<TagType>())
	DiagnoseUseOfDecl(TT->getDecl(), E->getExprLoc());
	}
	return Context.getTypeOfExprType(E);
	}

	/// getDecltypeForExpr - Given an expr, will return the decltype for
	/// that expression, according to the rules in C++11
	/// [dcl.type.simple]p4 and C++11 [expr.lambda.prim]p18.
	QualType Sema::getDecltypeForExpr(Expr *E) {
	if (E->isTypeDependent())
	return Context.DependentTy;

	Expr *IDExpr = E;
	if (auto *ImplCastExpr = dyn_cast<ImplicitCastExpr>(E))
	IDExpr = ImplCastExpr->getSubExpr();

	// C++11 [dcl.type.simple]p4:
	// The type denoted by decltype(e) is defined as follows:

	// C++20:
	// - if E is an unparenthesized id-expression naming a non-type
	// template-parameter (13.2), decltype(E) is the type of the
	// template-parameter after performing any necessary type deduction
	// Note that this does not pick up the implicit 'const' for a template
	// parameter object. This rule makes no difference before C++20 so we apply
	// it unconditionally.
	if (const auto *SNTTPE = dyn_cast<SubstNonTypeTemplateParmExpr>(IDExpr))
	return SNTTPE->getParameterType(Context);

	// - if e is an unparenthesized id-expression or an unparenthesized class
	// member access (5.2.5), decltype(e) is the type of the entity named
	// by e. If there is no such entity, or if e names a set of overloaded
	// functions, the program is ill-formed;
	//
	// We apply the same rules for Objective-C ivar and property references.
	if (const auto *DRE = dyn_cast<DeclRefExpr>(IDExpr)) {
	const ValueDecl *VD = DRE->getDecl();
	QualType T = VD->getType();
	return isa<TemplateParamObjectDecl>(VD) ? T.getUnqualifiedType() : T;
	}
	if (const auto *ME = dyn_cast<MemberExpr>(IDExpr)) {
	if (const auto *VD = ME->getMemberDecl())
	if (isa<FieldDecl>(VD) \|\| isa<VarDecl>(VD))
	return VD->getType();
	} else if (const auto *IR = dyn_cast<ObjCIvarRefExpr>(IDExpr)) {
	return IR->getDecl()->getType();
	} else if (const auto *PR = dyn_cast<ObjCPropertyRefExpr>(IDExpr)) {
	if (PR->isExplicitProperty())
	return PR->getExplicitProperty()->getType();
	} else if (const auto *PE = dyn_cast<PredefinedExpr>(IDExpr)) {
	return PE->getType();
	}

	// C++11 [expr.lambda.prim]p18:
	// Every occurrence of decltype((x)) where x is a possibly
	// parenthesized id-expression that names an entity of automatic
	// storage duration is treated as if x were transformed into an
	// access to a corresponding data member of the closure type that
	// would have been declared if x were an odr-use of the denoted
	// entity.
	if (getCurLambda() && isa<ParenExpr>(IDExpr)) {
	if (auto *DRE = dyn_cast<DeclRefExpr>(IDExpr->IgnoreParens())) {
	if (auto *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
	QualType T = getCapturedDeclRefType(Var, DRE->getLocation());
	if (!T.isNull())
	return Context.getLValueReferenceType(T);
	}
	}
	}

	return Context.getReferenceQualifiedType(E);
	}

	QualType Sema::BuildDecltypeType(Expr *E, bool AsUnevaluated) {
	assert(!E->hasPlaceholderType() && "unexpected placeholder");

	if (AsUnevaluated && CodeSynthesisContexts.empty() &&
	!E->isInstantiationDependent() && E->HasSideEffects(Context, false)) {
	// The expression operand for decltype is in an unevaluated expression
	// context, so side effects could result in unintended consequences.
	// Exclude instantiation-dependent expressions, because 'decltype' is often
	// used to build SFINAE gadgets.
	Diag(E->getExprLoc(), diag::warn_side_effects_unevaluated_context);
	}
	return Context.getDecltypeType(E, getDecltypeForExpr(E));
	}

	QualType Sema::BuildUnaryTransformType(QualType BaseType,
	UnaryTransformType::UTTKind UKind,
	SourceLocation Loc) {
	switch (UKind) {
	case UnaryTransformType::EnumUnderlyingType:
	if (!BaseType->isDependentType() && !BaseType->isEnumeralType()) {
	Diag(Loc, diag::err_only_enums_have_underlying_types);
	return QualType();
	} else {
	QualType Underlying = BaseType;
	if (!BaseType->isDependentType()) {
	// The enum could be incomplete if we're parsing its definition or
	// recovering from an error.
	NamedDecl *FwdDecl = nullptr;
	if (BaseType->isIncompleteType(&FwdDecl)) {
	Diag(Loc, diag::err_underlying_type_of_incomplete_enum) << BaseType;
	Diag(FwdDecl->getLocation(), diag::note_forward_declaration) << FwdDecl;
	return QualType();
	}

	EnumDecl *ED = BaseType->castAs<EnumType>()->getDecl();
	assert(ED && "EnumType has no EnumDecl");

	DiagnoseUseOfDecl(ED, Loc);

	Underlying = ED->getIntegerType();
	assert(!Underlying.isNull());
	}
	return Context.getUnaryTransformType(BaseType, Underlying,
	UnaryTransformType::EnumUnderlyingType);
	}
	}
	llvm_unreachable("unknown unary transform type");
	}

	QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
	if (!isDependentOrGNUAutoType(T)) {
	// FIXME: It isn't entirely clear whether incomplete atomic types
	// are allowed or not; for simplicity, ban them for the moment.
	if (RequireCompleteType(Loc, T, diag::err_atomic_specifier_bad_type, 0))
	return QualType();

	int DisallowedKind = -1;
	if (T->isArrayType())
	DisallowedKind = 1;
	else if (T->isFunctionType())
	DisallowedKind = 2;
	else if (T->isReferenceType())
	DisallowedKind = 3;
	else if (T->isAtomicType())
	DisallowedKind = 4;
	else if (T.hasQualifiers())
	DisallowedKind = 5;
	else if (T->isSizelessType())
	DisallowedKind = 6;
	else if (!T.isTriviallyCopyableType(Context))
	// Some other non-trivially-copyable type (probably a C++ class)
	DisallowedKind = 7;
	else if (T->isBitIntType())
	DisallowedKind = 8;

	if (DisallowedKind != -1) {
	Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T;
	return QualType();
	}

	// FIXME: Do we need any handling for ARC here?
	}

	// Build the pointer type.
	return Context.getAtomicType(T);
	}
	diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	index 19149d079822..ab65612bce90 100644
	--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
	@@ -1,3600 +1,3606 @@
	//===- ExprEngine.cpp - Path-Sensitive Expression-Level Dataflow ----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines a meta-engine for path-sensitive dataflow analysis that
	// is built on CoreEngine, but provides the boilerplate to execute transfer
	// functions and build the ExplodedGraph at the expression level.
	//
	//===----------------------------------------------------------------------===//

	#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
	#include "PrettyStackTraceLocationContext.h"
	#include "clang/AST/ASTContext.h"
	#include "clang/AST/Decl.h"
	#include "clang/AST/DeclBase.h"
	#include "clang/AST/DeclCXX.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/Expr.h"
	#include "clang/AST/ExprCXX.h"
	#include "clang/AST/ExprObjC.h"
	#include "clang/AST/ParentMap.h"
	#include "clang/AST/PrettyPrinter.h"
	#include "clang/AST/Stmt.h"
	#include "clang/AST/StmtCXX.h"
	#include "clang/AST/StmtObjC.h"
	#include "clang/AST/Type.h"
	#include "clang/Analysis/AnalysisDeclContext.h"
	#include "clang/Analysis/CFG.h"
	#include "clang/Analysis/ConstructionContext.h"
	#include "clang/Analysis/ProgramPoint.h"
	#include "clang/Basic/IdentifierTable.h"
	#include "clang/Basic/JsonSupport.h"
	#include "clang/Basic/LLVM.h"
	#include "clang/Basic/LangOptions.h"
	#include "clang/Basic/PrettyStackTrace.h"
	#include "clang/Basic/SourceLocation.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/Specifiers.h"
	#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
	#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
	#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
	#include "clang/StaticAnalyzer/Core/CheckerManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/LoopUnrolling.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/Store.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
	#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/ImmutableMap.h"
	#include "llvm/ADT/ImmutableSet.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/DOTGraphTraits.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/GraphWriter.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cassert>
	#include <cstdint>
	#include <memory>
	#include <string>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace clang;
	using namespace ento;

	#define DEBUG_TYPE "ExprEngine"

	STATISTIC(NumRemoveDeadBindings,
	"The # of times RemoveDeadBindings is called");
	STATISTIC(NumMaxBlockCountReached,
	"The # of aborted paths due to reaching the maximum block count in "
	"a top level function");
	STATISTIC(NumMaxBlockCountReachedInInlined,
	"The # of aborted paths due to reaching the maximum block count in "
	"an inlined function");
	STATISTIC(NumTimesRetriedWithoutInlining,
	"The # of times we re-evaluated a call without inlining");

	//===----------------------------------------------------------------------===//
	// Internal program state traits.
	//===----------------------------------------------------------------------===//

	namespace {

	// When modeling a C++ constructor, for a variety of reasons we need to track
	// the location of the object for the duration of its ConstructionContext.
	// ObjectsUnderConstruction maps statements within the construction context
	// to the object's location, so that on every such statement the location
	// could have been retrieved.

	/// ConstructedObjectKey is used for being able to find the path-sensitive
	/// memory region of a freshly constructed object while modeling the AST node
	/// that syntactically represents the object that is being constructed.
	/// Semantics of such nodes may sometimes require access to the region that's
	/// not otherwise present in the program state, or to the very fact that
	/// the construction context was present and contained references to these
	/// AST nodes.
	class ConstructedObjectKey {
	using ConstructedObjectKeyImpl =
	std::pair<ConstructionContextItem, const LocationContext *>;
	const ConstructedObjectKeyImpl Impl;

	public:
	explicit ConstructedObjectKey(const ConstructionContextItem &Item,
	const LocationContext *LC)
	: Impl(Item, LC) {}

	const ConstructionContextItem &getItem() const { return Impl.first; }
	const LocationContext *getLocationContext() const { return Impl.second; }

	ASTContext &getASTContext() const {
	return getLocationContext()->getDecl()->getASTContext();
	}

	void printJson(llvm::raw_ostream &Out, PrinterHelper *Helper,
	PrintingPolicy &PP) const {
	const Stmt *S = getItem().getStmtOrNull();
	const CXXCtorInitializer *I = nullptr;
	if (!S)
	I = getItem().getCXXCtorInitializer();

	if (S)
	Out << "\"stmt_id\": " << S->getID(getASTContext());
	else
	Out << "\"init_id\": " << I->getID(getASTContext());

	// Kind
	Out << ", \"kind\": \"" << getItem().getKindAsString()
	<< "\", \"argument_index\": ";

	if (getItem().getKind() == ConstructionContextItem::ArgumentKind)
	Out << getItem().getIndex();
	else
	Out << "null";

	// Pretty-print
	Out << ", \"pretty\": ";

	if (S) {
	S->printJson(Out, Helper, PP, /AddQuotes=/true);
	} else {
	Out << '\"' << I->getAnyMember()->getDeclName() << '\"';
	}
	}

	void Profile(llvm::FoldingSetNodeID &ID) const {
	ID.Add(Impl.first);
	ID.AddPointer(Impl.second);
	}

	bool operator==(const ConstructedObjectKey &RHS) const {
	return Impl == RHS.Impl;
	}

	bool operator<(const ConstructedObjectKey &RHS) const {
	return Impl < RHS.Impl;
	}
	};
	} // namespace

	typedef llvm::ImmutableMap<ConstructedObjectKey, SVal>
	ObjectsUnderConstructionMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(ObjectsUnderConstruction,
	ObjectsUnderConstructionMap)

	// This trait is responsible for storing the index of the element that is to be
	// constructed in the next iteration. As a result a CXXConstructExpr is only
	// stored if it is array type. Also the index is the index of the continous
	// memory region, which is important for multi-dimensional arrays. E.g:: int
	// arr[2][2]; assume arr[1][1] will be the next element under construction, so
	// the index is 3.
	typedef llvm::ImmutableMap<
	std::pair<const CXXConstructExpr , const LocationContext >, unsigned>
	IndexOfElementToConstructMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(IndexOfElementToConstruct,
	IndexOfElementToConstructMap)

	// This trait is responsible for holding our pending ArrayInitLoopExprs.
	// It pairs the LocationContext and the initializer CXXConstructExpr with
	// the size of the array that's being copy initialized.
	typedef llvm::ImmutableMap<
	std::pair<const CXXConstructExpr , const LocationContext >, unsigned>
	PendingInitLoopMap;
	REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingInitLoop, PendingInitLoopMap)
	//===----------------------------------------------------------------------===//
	// Engine construction and deletion.
	//===----------------------------------------------------------------------===//

	static const char* TagProviderName = "ExprEngine";

	ExprEngine::ExprEngine(cross_tu::CrossTranslationUnitContext &CTU,
	AnalysisManager &mgr, SetOfConstDecls *VisitedCalleesIn,
	FunctionSummariesTy *FS, InliningModes HowToInlineIn)
	: CTU(CTU), IsCTUEnabled(mgr.getAnalyzerOptions().IsNaiveCTUEnabled),
	AMgr(mgr), AnalysisDeclContexts(mgr.getAnalysisDeclContextManager()),
	Engine(*this, FS, mgr.getAnalyzerOptions()), G(Engine.getGraph()),
	StateMgr(getContext(), mgr.getStoreManagerCreator(),
	mgr.getConstraintManagerCreator(), G.getAllocator(), this),
	SymMgr(StateMgr.getSymbolManager()), MRMgr(StateMgr.getRegionManager()),
	svalBuilder(StateMgr.getSValBuilder()), ObjCNoRet(mgr.getASTContext()),
	BR(mgr, *this), VisitedCallees(VisitedCalleesIn),
	HowToInline(HowToInlineIn) {
	unsigned TrimInterval = mgr.options.GraphTrimInterval;
	if (TrimInterval != 0) {
	// Enable eager node reclamation when constructing the ExplodedGraph.
	G.enableNodeReclamation(TrimInterval);
	}
	}

	//===----------------------------------------------------------------------===//
	// Utility methods.
	//===----------------------------------------------------------------------===//

	ProgramStateRef ExprEngine::getInitialState(const LocationContext *InitLoc) {
	ProgramStateRef state = StateMgr.getInitialState(InitLoc);
	const Decl *D = InitLoc->getDecl();

	// Preconditions.
	// FIXME: It would be nice if we had a more general mechanism to add
	// such preconditions. Some day.
	do {
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	// Precondition: the first argument of 'main' is an integer guaranteed
	// to be > 0.
	const IdentifierInfo *II = FD->getIdentifier();
	if (!II \|\| !(II->getName() == "main" && FD->getNumParams() > 0))
	break;

	const ParmVarDecl *PD = FD->getParamDecl(0);
	QualType T = PD->getType();
	const auto *BT = dyn_cast<BuiltinType>(T);
	if (!BT \|\| !BT->isInteger())
	break;

	const MemRegion *R = state->getRegion(PD, InitLoc);
	if (!R)
	break;

	SVal V = state->getSVal(loc::MemRegionVal(R));
	SVal Constraint_untested = evalBinOp(state, BO_GT, V,
	svalBuilder.makeZeroVal(T),
	svalBuilder.getConditionType());

	Optional<DefinedOrUnknownSVal> Constraint =
	Constraint_untested.getAs<DefinedOrUnknownSVal>();

	if (!Constraint)
	break;

	if (ProgramStateRef newState = state->assume(*Constraint, true))
	state = newState;
	}
	break;
	}
	while (false);

	if (const auto *MD = dyn_cast<ObjCMethodDecl>(D)) {
	// Precondition: 'self' is always non-null upon entry to an Objective-C
	// method.
	const ImplicitParamDecl *SelfD = MD->getSelfDecl();
	const MemRegion *R = state->getRegion(SelfD, InitLoc);
	SVal V = state->getSVal(loc::MemRegionVal(R));

	if (Optional<Loc> LV = V.getAs<Loc>()) {
	// Assume that the pointer value in 'self' is non-null.
	state = state->assume(*LV, true);
	assert(state && "'self' cannot be null");
	}
	}

	if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
	if (!MD->isStatic()) {
	// Precondition: 'this' is always non-null upon entry to the
	// top-level function. This is our starting assumption for
	// analyzing an "open" program.
	const StackFrameContext *SFC = InitLoc->getStackFrame();
	if (SFC->getParent() == nullptr) {
	loc::MemRegionVal L = svalBuilder.getCXXThis(MD, SFC);
	SVal V = state->getSVal(L);
	if (Optional<Loc> LV = V.getAs<Loc>()) {
	state = state->assume(*LV, true);
	assert(state && "'this' cannot be null");
	}
	}
	}
	}

	return state;
	}

	ProgramStateRef ExprEngine::createTemporaryRegionIfNeeded(
	ProgramStateRef State, const LocationContext *LC,
	const Expr InitWithAdjustments, const Expr Result,
	const SubRegion **OutRegionWithAdjustments) {
	// FIXME: This function is a hack that works around the quirky AST
	// we're often having with respect to C++ temporaries. If only we modelled
	// the actual execution order of statements properly in the CFG,
	// all the hassle with adjustments would not be necessary,
	// and perhaps the whole function would be removed.
	SVal InitValWithAdjustments = State->getSVal(InitWithAdjustments, LC);
	if (!Result) {
	// If we don't have an explicit result expression, we're in "if needed"
	// mode. Only create a region if the current value is a NonLoc.
	if (!isa<NonLoc>(InitValWithAdjustments)) {
	if (OutRegionWithAdjustments)
	*OutRegionWithAdjustments = nullptr;
	return State;
	}
	Result = InitWithAdjustments;
	} else {
	// We need to create a region no matter what. Make sure we don't try to
	// stuff a Loc into a non-pointer temporary region.
	assert(!isa<Loc>(InitValWithAdjustments) \|\|
	Loc::isLocType(Result->getType()) \|\|
	Result->getType()->isMemberPointerType());
	}

	ProgramStateManager &StateMgr = State->getStateManager();
	MemRegionManager &MRMgr = StateMgr.getRegionManager();
	StoreManager &StoreMgr = StateMgr.getStoreManager();

	// MaterializeTemporaryExpr may appear out of place, after a few field and
	// base-class accesses have been made to the object, even though semantically
	// it is the whole object that gets materialized and lifetime-extended.
	//
	// For example:
	//
	// `-MaterializeTemporaryExpr
	// `-MemberExpr
	// `-CXXTemporaryObjectExpr
	//
	// instead of the more natural
	//
	// `-MemberExpr
	// `-MaterializeTemporaryExpr
	// `-CXXTemporaryObjectExpr
	//
	// Use the usual methods for obtaining the expression of the base object,
	// and record the adjustments that we need to make to obtain the sub-object
	// that the whole expression 'Ex' refers to. This trick is usual,
	// in the sense that CodeGen takes a similar route.

	SmallVector<const Expr *, 2> CommaLHSs;
	SmallVector<SubobjectAdjustment, 2> Adjustments;

	const Expr *Init = InitWithAdjustments->skipRValueSubobjectAdjustments(
	CommaLHSs, Adjustments);

	// Take the region for Init, i.e. for the whole object. If we do not remember
	// the region in which the object originally was constructed, come up with
	// a new temporary region out of thin air and copy the contents of the object
	// (which are currently present in the Environment, because Init is an rvalue)
	// into that region. This is not correct, but it is better than nothing.
	const TypedValueRegion *TR = nullptr;
	if (const auto *MT = dyn_cast<MaterializeTemporaryExpr>(Result)) {
	if (Optional<SVal> V = getObjectUnderConstruction(State, MT, LC)) {
	State = finishObjectConstruction(State, MT, LC);
	State = State->BindExpr(Result, LC, *V);
	return State;
	} else {
	StorageDuration SD = MT->getStorageDuration();
	// If this object is bound to a reference with static storage duration, we
	// put it in a different region to prevent "address leakage" warnings.
	if (SD == SD_Static \|\| SD == SD_Thread) {
	TR = MRMgr.getCXXStaticTempObjectRegion(Init);
	} else {
	TR = MRMgr.getCXXTempObjectRegion(Init, LC);
	}
	}
	} else {
	TR = MRMgr.getCXXTempObjectRegion(Init, LC);
	}

	SVal Reg = loc::MemRegionVal(TR);
	SVal BaseReg = Reg;

	// Make the necessary adjustments to obtain the sub-object.
	for (const SubobjectAdjustment &Adj : llvm::reverse(Adjustments)) {
	switch (Adj.Kind) {
	case SubobjectAdjustment::DerivedToBaseAdjustment:
	Reg = StoreMgr.evalDerivedToBase(Reg, Adj.DerivedToBase.BasePath);
	break;
	case SubobjectAdjustment::FieldAdjustment:
	Reg = StoreMgr.getLValueField(Adj.Field, Reg);
	break;
	case SubobjectAdjustment::MemberPointerAdjustment:
	// FIXME: Unimplemented.
	State = State->invalidateRegions(Reg, InitWithAdjustments,
	currBldrCtx->blockCount(), LC, true,
	nullptr, nullptr, nullptr);
	return State;
	}
	}

	// What remains is to copy the value of the object to the new region.
	// FIXME: In other words, what we should always do is copy value of the
	// Init expression (which corresponds to the bigger object) to the whole
	// temporary region TR. However, this value is often no longer present
	// in the Environment. If it has disappeared, we instead invalidate TR.
	// Still, what we can do is assign the value of expression Ex (which
	// corresponds to the sub-object) to the TR's sub-region Reg. At least,
	// values inside Reg would be correct.
	SVal InitVal = State->getSVal(Init, LC);
	if (InitVal.isUnknown()) {
	InitVal = getSValBuilder().conjureSymbolVal(Result, LC, Init->getType(),
	currBldrCtx->blockCount());
	State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);

	// Then we'd need to take the value that certainly exists and bind it
	// over.
	if (InitValWithAdjustments.isUnknown()) {
	// Try to recover some path sensitivity in case we couldn't
	// compute the value.
	InitValWithAdjustments = getSValBuilder().conjureSymbolVal(
	Result, LC, InitWithAdjustments->getType(),
	currBldrCtx->blockCount());
	}
	State =
	State->bindLoc(Reg.castAs<Loc>(), InitValWithAdjustments, LC, false);
	} else {
	State = State->bindLoc(BaseReg.castAs<Loc>(), InitVal, LC, false);
	}

	// The result expression would now point to the correct sub-region of the
	// newly created temporary region. Do this last in order to getSVal of Init
	// correctly in case (Result == Init).
	if (Result->isGLValue()) {
	State = State->BindExpr(Result, LC, Reg);
	} else {
	State = State->BindExpr(Result, LC, InitValWithAdjustments);
	}

	// Notify checkers once for two bindLoc()s.
	State = processRegionChange(State, TR, LC);

	if (OutRegionWithAdjustments)
	*OutRegionWithAdjustments = cast<SubRegion>(Reg.getAsRegion());
	return State;
	}

	ProgramStateRef ExprEngine::setIndexOfElementToConstruct(
	ProgramStateRef State, const CXXConstructExpr *E,
	const LocationContext *LCtx, unsigned Idx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(!State->contains<IndexOfElementToConstruct>(Key) \|\| Idx > 0);

	return State->set<IndexOfElementToConstruct>(Key, Idx);
	}

	Optional<unsigned> ExprEngine::getPendingInitLoop(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {

	return Optional<unsigned>::create(
	State->get<PendingInitLoop>({E, LCtx->getStackFrame()}));
	}

	ProgramStateRef ExprEngine::removePendingInitLoop(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(E && State->contains<PendingInitLoop>(Key));
	return State->remove<PendingInitLoop>(Key);
	}

	ProgramStateRef ExprEngine::setPendingInitLoop(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx,
	unsigned Size) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(!State->contains<PendingInitLoop>(Key) && Size > 0);

	return State->set<PendingInitLoop>(Key, Size);
	}

	Optional<unsigned>
	ExprEngine::getIndexOfElementToConstruct(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {

	return Optional<unsigned>::create(
	State->get<IndexOfElementToConstruct>({E, LCtx->getStackFrame()}));
	}

	ProgramStateRef
	ExprEngine::removeIndexOfElementToConstruct(ProgramStateRef State,
	const CXXConstructExpr *E,
	const LocationContext *LCtx) {
	auto Key = std::make_pair(E, LCtx->getStackFrame());

	assert(E && State->contains<IndexOfElementToConstruct>(Key));
	return State->remove<IndexOfElementToConstruct>(Key);
	}

	ProgramStateRef
	ExprEngine::addObjectUnderConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC, SVal V) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());

	const Expr *Init = nullptr;

	if (auto DS = dyn_cast_or_null<DeclStmt>(Item.getStmtOrNull())) {
	if (auto VD = dyn_cast_or_null<VarDecl>(DS->getSingleDecl()))
	Init = VD->getInit();
	}

	if (auto LE = dyn_cast_or_null<LambdaExpr>(Item.getStmtOrNull()))
	Init = *(LE->capture_init_begin() + Item.getIndex());

	if (!Init && !Item.getStmtOrNull())
	Init = Item.getCXXCtorInitializer()->getInit();

	// In an ArrayInitLoopExpr the real initializer is returned by
	// getSubExpr().
	if (const auto *AILE = dyn_cast_or_null<ArrayInitLoopExpr>(Init))
	Init = AILE->getSubExpr();

	// FIXME: Currently the state might already contain the marker due to
	// incorrect handling of temporaries bound to default parameters.
	// The state will already contain the marker if we construct elements
	// in an array, as we visit the same statement multiple times before
	// the array declaration. The marker is removed when we exit the
	// constructor call.
	assert((!State->get<ObjectsUnderConstruction>(Key) \|\|
	Key.getItem().getKind() ==
	ConstructionContextItem::TemporaryDestructorKind \|\|
	State->contains<IndexOfElementToConstruct>(
	{dyn_cast_or_null<CXXConstructExpr>(Init), LC})) &&
	"The object is already marked as `UnderConstruction`, when it's not "
	"supposed to!");
	return State->set<ObjectsUnderConstruction>(Key, V);
	}

	Optional<SVal>
	ExprEngine::getObjectUnderConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());
	return Optional<SVal>::create(State->get<ObjectsUnderConstruction>(Key));
	}

	ProgramStateRef
	ExprEngine::finishObjectConstruction(ProgramStateRef State,
	const ConstructionContextItem &Item,
	const LocationContext *LC) {
	ConstructedObjectKey Key(Item, LC->getStackFrame());
	assert(State->contains<ObjectsUnderConstruction>(Key));
	return State->remove<ObjectsUnderConstruction>(Key);
	}

	ProgramStateRef ExprEngine::elideDestructor(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	// FIXME: Currently the state might already contain the marker due to
	// incorrect handling of temporaries bound to default parameters.
	return State->set<ObjectsUnderConstruction>(Key, UnknownVal());
	}

	ProgramStateRef
	ExprEngine::cleanupElidedDestructor(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	assert(State->contains<ObjectsUnderConstruction>(Key));
	return State->remove<ObjectsUnderConstruction>(Key);
	}

	bool ExprEngine::isDestructorElided(ProgramStateRef State,
	const CXXBindTemporaryExpr *BTE,
	const LocationContext *LC) {
	ConstructedObjectKey Key({BTE, /IsElided=/true}, LC);
	return State->contains<ObjectsUnderConstruction>(Key);
	}

	bool ExprEngine::areAllObjectsFullyConstructed(ProgramStateRef State,
	const LocationContext *FromLC,
	const LocationContext *ToLC) {
	const LocationContext *LC = FromLC;
	while (LC != ToLC) {
	assert(LC && "ToLC must be a parent of FromLC!");
	for (auto I : State->get<ObjectsUnderConstruction>())
	if (I.first.getLocationContext() == LC)
	return false;

	LC = LC->getParent();
	}
	return true;
	}


	//===----------------------------------------------------------------------===//
	// Top-level transfer function logic (Dispatcher).
	//===----------------------------------------------------------------------===//

	/// evalAssume - Called by ConstraintManager. Used to call checker-specific
	/// logic for handling assumptions on symbolic values.
	ProgramStateRef ExprEngine::processAssume(ProgramStateRef state,
	SVal cond, bool assumption) {
	return getCheckerManager().runCheckersForEvalAssume(state, cond, assumption);
	}

	ProgramStateRef
	ExprEngine::processRegionChanges(ProgramStateRef state,
	const InvalidatedSymbols *invalidated,
	ArrayRef<const MemRegion *> Explicits,
	ArrayRef<const MemRegion *> Regions,
	const LocationContext *LCtx,
	const CallEvent *Call) {
	return getCheckerManager().runCheckersForRegionChanges(state, invalidated,
	Explicits, Regions,
	LCtx, Call);
	}

	static void
	printObjectsUnderConstructionJson(raw_ostream &Out, ProgramStateRef State,
	const char NL, const LocationContext LCtx,
	unsigned int Space = 0, bool IsDot = false) {
	PrintingPolicy PP =
	LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();

	++Space;
	bool HasItem = false;

	// Store the last key.
	const ConstructedObjectKey *LastKey = nullptr;
	for (const auto &I : State->get<ObjectsUnderConstruction>()) {
	const ConstructedObjectKey &Key = I.first;
	if (Key.getLocationContext() != LCtx)
	continue;

	if (!HasItem) {
	Out << "[" << NL;
	HasItem = true;
	}

	LastKey = &Key;
	}

	for (const auto &I : State->get<ObjectsUnderConstruction>()) {
	const ConstructedObjectKey &Key = I.first;
	SVal Value = I.second;
	if (Key.getLocationContext() != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";
	Key.printJson(Out, nullptr, PP);
	Out << ", \"value\": \"" << Value << "\" }";

	if (&Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	static void printIndicesOfElementsToConstructJson(
	raw_ostream &Out, ProgramStateRef State, const char *NL,
	const LocationContext *LCtx, const ASTContext &Context,
	unsigned int Space = 0, bool IsDot = false) {
	using KeyT = std::pair<const Expr , const LocationContext >;

	PrintingPolicy PP =
	LCtx->getAnalysisDeclContext()->getASTContext().getPrintingPolicy();

	++Space;
	bool HasItem = false;

	// Store the last key.
	KeyT LastKey;
	for (const auto &I : State->get<IndexOfElementToConstruct>()) {
	const KeyT &Key = I.first;
	if (Key.second != LCtx)
	continue;

	if (!HasItem) {
	Out << "[" << NL;
	HasItem = true;
	}

	LastKey = Key;
	}

	for (const auto &I : State->get<IndexOfElementToConstruct>()) {
	const KeyT &Key = I.first;
	unsigned Value = I.second;
	if (Key.second != LCtx)
	continue;

	Indent(Out, Space, IsDot) << "{ ";

	// Expr
	const Expr *E = Key.first;
	Out << "\"stmt_id\": " << E->getID(Context);

	// Kind - hack to display the current index
	Out << ", \"kind\": \"Cur: " << Value - 1 << "\"";

	// Pretty-print
	Out << ", \"pretty\": ";
	Out << "\"" << E->getStmtClassName() << " "
	<< E->getSourceRange().printToString(Context.getSourceManager()) << " '"
	<< QualType::getAsString(E->getType().split(), PP);
	Out << "'\"";

	Out << ", \"value\": \"Next: " << Value << "\" }";

	if (Key != LastKey)
	Out << ',';
	Out << NL;
	}

	if (HasItem)
	Indent(Out, --Space, IsDot) << ']'; // End of "location_context".
	else {
	Out << "null ";
	}
	}

	void ExprEngine::printJson(raw_ostream &Out, ProgramStateRef State,
	const LocationContext LCtx, const char NL,
	unsigned int Space, bool IsDot) const {
	Indent(Out, Space, IsDot) << "\"constructing_objects\": ";

	if (LCtx && !State->get<ObjectsUnderConstruction>().isEmpty()) {
	++Space;
	Out << '[' << NL;
	LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
	printObjectsUnderConstructionJson(Out, State, NL, LC, Space, IsDot);
	});

	--Space;
	Indent(Out, Space, IsDot) << "]," << NL; // End of "constructing_objects".
	} else {
	Out << "null," << NL;
	}

	Indent(Out, Space, IsDot) << "\"index_of_element\": ";
	if (LCtx && !State->get<IndexOfElementToConstruct>().isEmpty()) {
	++Space;

	auto &Context = getContext();
	Out << '[' << NL;
	LCtx->printJson(Out, NL, Space, IsDot, [&](const LocationContext *LC) {
	printIndicesOfElementsToConstructJson(Out, State, NL, LC, Context, Space,
	IsDot);
	});

	--Space;
	Indent(Out, Space, IsDot) << "]," << NL; // End of "index_of_element".
	} else {
	Out << "null," << NL;
	}

	getCheckerManager().runCheckersForPrintStateJson(Out, State, NL, Space,
	IsDot);
	}

	void ExprEngine::processEndWorklist() {
	// This prints the name of the top-level function if we crash.
	PrettyStackTraceLocationContext CrashInfo(getRootLocationContext());
	getCheckerManager().runCheckersForEndAnalysis(G, BR, *this);
	}

	void ExprEngine::processCFGElement(const CFGElement E, ExplodedNode *Pred,
	unsigned StmtIdx, NodeBuilderContext *Ctx) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	currStmtIdx = StmtIdx;
	currBldrCtx = Ctx;

	switch (E.getKind()) {
	case CFGElement::Statement:
	case CFGElement::Constructor:
	case CFGElement::CXXRecordTypedCall:
	ProcessStmt(E.castAs<CFGStmt>().getStmt(), Pred);
	return;
	case CFGElement::Initializer:
	ProcessInitializer(E.castAs<CFGInitializer>(), Pred);
	return;
	case CFGElement::NewAllocator:
	ProcessNewAllocator(E.castAs<CFGNewAllocator>().getAllocatorExpr(),
	Pred);
	return;
	case CFGElement::AutomaticObjectDtor:
	case CFGElement::DeleteDtor:
	case CFGElement::BaseDtor:
	case CFGElement::MemberDtor:
	case CFGElement::TemporaryDtor:
	ProcessImplicitDtor(E.castAs<CFGImplicitDtor>(), Pred);
	return;
	case CFGElement::LoopExit:
	ProcessLoopExit(E.castAs<CFGLoopExit>().getLoopStmt(), Pred);
	return;
	case CFGElement::LifetimeEnds:
	case CFGElement::ScopeBegin:
	case CFGElement::ScopeEnd:
	return;
	}
	}

	static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
	const Stmt *S,
	const ExplodedNode *Pred,
	const LocationContext *LC) {
	// Are we never purging state values?
	if (AMgr.options.AnalysisPurgeOpt == PurgeNone)
	return false;

	// Is this the beginning of a basic block?
	if (Pred->getLocation().getAs<BlockEntrance>())
	return true;

	// Is this on a non-expression?
	if (!isa<Expr>(S))
	return true;

	// Run before processing a call.
	if (CallEvent::isCallStmt(S))
	return true;

	// Is this an expression that is consumed by another expression? If so,
	// postpone cleaning out the state.
	ParentMap &PM = LC->getAnalysisDeclContext()->getParentMap();
	return !PM.isConsumedExpr(cast<Expr>(S));
	}

	void ExprEngine::removeDead(ExplodedNode *Pred, ExplodedNodeSet &Out,
	const Stmt *ReferenceStmt,
	const LocationContext *LC,
	const Stmt *DiagnosticStmt,
	ProgramPoint::Kind K) {
	assert((K == ProgramPoint::PreStmtPurgeDeadSymbolsKind \|\|
	ReferenceStmt == nullptr \|\| isa<ReturnStmt>(ReferenceStmt))
	&& "PostStmt is not generally supported by the SymbolReaper yet");
	assert(LC && "Must pass the current (or expiring) LocationContext");

	if (!DiagnosticStmt) {
	DiagnosticStmt = ReferenceStmt;
	assert(DiagnosticStmt && "Required for clearing a LocationContext");
	}

	NumRemoveDeadBindings++;
	ProgramStateRef CleanedState = Pred->getState();

	// LC is the location context being destroyed, but SymbolReaper wants a
	// location context that is still live. (If this is the top-level stack
	// frame, this will be null.)
	if (!ReferenceStmt) {
	assert(K == ProgramPoint::PostStmtPurgeDeadSymbolsKind &&
	"Use PostStmtPurgeDeadSymbolsKind for clearing a LocationContext");
	LC = LC->getParent();
	}

	const StackFrameContext *SFC = LC ? LC->getStackFrame() : nullptr;
	SymbolReaper SymReaper(SFC, ReferenceStmt, SymMgr, getStoreManager());

	for (auto I : CleanedState->get<ObjectsUnderConstruction>()) {
	if (SymbolRef Sym = I.second.getAsSymbol())
	SymReaper.markLive(Sym);
	if (const MemRegion *MR = I.second.getAsRegion())
	SymReaper.markLive(MR);
	}

	getCheckerManager().runCheckersForLiveSymbols(CleanedState, SymReaper);

	// Create a state in which dead bindings are removed from the environment
	// and the store. TODO: The function should just return new env and store,
	// not a new state.
	CleanedState = StateMgr.removeDeadBindingsFromEnvironmentAndStore(
	CleanedState, SFC, SymReaper);

	// Process any special transfer function for dead symbols.
	// A tag to track convenience transitions, which can be removed at cleanup.
	static SimpleProgramPointTag cleanupTag(TagProviderName, "Clean Node");
	// Call checkers with the non-cleaned state so that they could query the
	// values of the soon to be dead symbols.
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForDeadSymbols(CheckedSet, Pred, SymReaper,
	DiagnosticStmt, *this, K);

	// For each node in CheckedSet, generate CleanedNodes that have the
	// environment, the store, and the constraints cleaned up but have the
	// user-supplied states as the predecessors.
	StmtNodeBuilder Bldr(CheckedSet, Out, *currBldrCtx);
	for (const auto I : CheckedSet) {
	ProgramStateRef CheckerState = I->getState();

	// The constraint manager has not been cleaned up yet, so clean up now.
	CheckerState =
	getConstraintManager().removeDeadBindings(CheckerState, SymReaper);

	assert(StateMgr.haveEqualEnvironments(CheckerState, Pred->getState()) &&
	"Checkers are not allowed to modify the Environment as a part of "
	"checkDeadSymbols processing.");
	assert(StateMgr.haveEqualStores(CheckerState, Pred->getState()) &&
	"Checkers are not allowed to modify the Store as a part of "
	"checkDeadSymbols processing.");

	// Create a state based on CleanedState with CheckerState GDM and
	// generate a transition to that state.
	ProgramStateRef CleanedCheckerSt =
	StateMgr.getPersistentStateWithGDM(CleanedState, CheckerState);
	Bldr.generateNode(DiagnosticStmt, I, CleanedCheckerSt, &cleanupTag, K);
	}
	}

	void ExprEngine::ProcessStmt(const Stmt currStmt, ExplodedNode Pred) {
	// Reclaim any unnecessary nodes in the ExplodedGraph.
	G.reclaimRecentlyAllocatedNodes();

	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	currStmt->getBeginLoc(),
	"Error evaluating statement");

	// Remove dead bindings and symbols.
	ExplodedNodeSet CleanedStates;
	if (shouldRemoveDeadBindings(AMgr, currStmt, Pred,
	Pred->getLocationContext())) {
	removeDead(Pred, CleanedStates, currStmt,
	Pred->getLocationContext());
	} else
	CleanedStates.Add(Pred);

	// Visit the statement.
	ExplodedNodeSet Dst;
	for (const auto I : CleanedStates) {
	ExplodedNodeSet DstI;
	// Visit the statement.
	Visit(currStmt, I, DstI);
	Dst.insert(DstI);
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessLoopExit(const Stmt* S, ExplodedNode *Pred) {
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	S->getBeginLoc(),
	"Error evaluating end of the loop");
	ExplodedNodeSet Dst;
	Dst.Add(Pred);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	ProgramStateRef NewState = Pred->getState();

	if(AMgr.options.ShouldUnrollLoops)
	NewState = processLoopEnd(S, NewState);

	LoopExit PP(S, Pred->getLocationContext());
	Bldr.generateNode(PP, NewState, Pred);
	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessInitializer(const CFGInitializer CFGInit,
	ExplodedNode *Pred) {
	const CXXCtorInitializer *BMI = CFGInit.getInitializer();
	const Expr *Init = BMI->getInit()->IgnoreImplicit();
	const LocationContext *LC = Pred->getLocationContext();

	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	BMI->getSourceLocation(),
	"Error evaluating initializer");

	// We don't clean up dead bindings here.
	const auto *stackFrame = cast<StackFrameContext>(Pred->getLocationContext());
	const auto *decl = cast<CXXConstructorDecl>(stackFrame->getDecl());

	ProgramStateRef State = Pred->getState();
	SVal thisVal = State->getSVal(svalBuilder.getCXXThis(decl, stackFrame));

	ExplodedNodeSet Tmp;
	SVal FieldLoc;

	// Evaluate the initializer, if necessary
	if (BMI->isAnyMemberInitializer()) {
	// Constructors build the object directly in the field,
	// but non-objects must be copied in from the initializer.
	if (getObjectUnderConstruction(State, BMI, LC)) {
	// The field was directly constructed, so there is no need to bind.
	// But we still need to stop tracking the object under construction.
	State = finishObjectConstruction(State, BMI, LC);
	NodeBuilder Bldr(Pred, Tmp, *currBldrCtx);
	PostStore PS(Init, LC, /Loc/ nullptr, /tag/ nullptr);
	Bldr.generateNode(PS, State, Pred);
	} else {
	const ValueDecl *Field;
	if (BMI->isIndirectMemberInitializer()) {
	Field = BMI->getIndirectMember();
	FieldLoc = State->getLValue(BMI->getIndirectMember(), thisVal);
	} else {
	Field = BMI->getMember();
	FieldLoc = State->getLValue(BMI->getMember(), thisVal);
	}

	SVal InitVal;
	if (Init->getType()->isArrayType()) {
	// Handle arrays of trivial type. We can represent this with a
	// primitive load/copy from the base array region.
	const ArraySubscriptExpr *ASE;
	while ((ASE = dyn_cast<ArraySubscriptExpr>(Init)))
	Init = ASE->getBase()->IgnoreImplicit();

	SVal LValue = State->getSVal(Init, stackFrame);
	if (!Field->getType()->isReferenceType())
	if (Optional<Loc> LValueLoc = LValue.getAs<Loc>())
	InitVal = State->getSVal(*LValueLoc);

	// If we fail to get the value for some reason, use a symbolic value.
	if (InitVal.isUnknownOrUndef()) {
	SValBuilder &SVB = getSValBuilder();
	InitVal = SVB.conjureSymbolVal(BMI->getInit(), stackFrame,
	Field->getType(),
	currBldrCtx->blockCount());
	}
	} else {
	InitVal = State->getSVal(BMI->getInit(), stackFrame);
	}

	PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
	evalBind(Tmp, Init, Pred, FieldLoc, InitVal, /isInit=/true, &PP);
	}
	} else {
	assert(BMI->isBaseInitializer() \|\| BMI->isDelegatingInitializer());
	Tmp.insert(Pred);
	// We already did all the work when visiting the CXXConstructExpr.
	}

	// Construct PostInitializer nodes whether the state changed or not,
	// so that the diagnostics don't get confused.
	PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
	ExplodedNodeSet Dst;
	NodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
	for (const auto I : Tmp) {
	ProgramStateRef State = I->getState();
	Bldr.generateNode(PP, State, I);
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessImplicitDtor(const CFGImplicitDtor D,
	ExplodedNode *Pred) {
	ExplodedNodeSet Dst;
	switch (D.getKind()) {
	case CFGElement::AutomaticObjectDtor:
	ProcessAutomaticObjDtor(D.castAs<CFGAutomaticObjDtor>(), Pred, Dst);
	break;
	case CFGElement::BaseDtor:
	ProcessBaseDtor(D.castAs<CFGBaseDtor>(), Pred, Dst);
	break;
	case CFGElement::MemberDtor:
	ProcessMemberDtor(D.castAs<CFGMemberDtor>(), Pred, Dst);
	break;
	case CFGElement::TemporaryDtor:
	ProcessTemporaryDtor(D.castAs<CFGTemporaryDtor>(), Pred, Dst);
	break;
	case CFGElement::DeleteDtor:
	ProcessDeleteDtor(D.castAs<CFGDeleteDtor>(), Pred, Dst);
	break;
	default:
	llvm_unreachable("Unexpected dtor kind.");
	}

	// Enqueue the new nodes onto the work list.
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessNewAllocator(const CXXNewExpr *NE,
	ExplodedNode *Pred) {
	ExplodedNodeSet Dst;
	AnalysisManager &AMgr = getAnalysisManager();
	AnalyzerOptions &Opts = AMgr.options;
	// TODO: We're not evaluating allocators for all cases just yet as
	// we're not handling the return value correctly, which causes false
	// positives when the alpha.cplusplus.NewDeleteLeaks check is on.
	if (Opts.MayInlineCXXAllocator)
	VisitCXXNewAllocatorCall(NE, Pred, Dst);
	else {
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	const LocationContext *LCtx = Pred->getLocationContext();
	PostImplicitCall PP(NE->getOperatorNew(), NE->getBeginLoc(), LCtx);
	Bldr.generateNode(PP, Pred->getState(), Pred);
	}
	Engine.enqueue(Dst, currBldrCtx->getBlock(), currStmtIdx);
	}

	void ExprEngine::ProcessAutomaticObjDtor(const CFGAutomaticObjDtor Dtor,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	const VarDecl *varDecl = Dtor.getVarDecl();
	QualType varType = varDecl->getType();

	ProgramStateRef state = Pred->getState();
	SVal dest = state->getLValue(varDecl, Pred->getLocationContext());
	const MemRegion *Region = dest.castAs<loc::MemRegionVal>().getRegion();

	if (varType->isReferenceType()) {
	const MemRegion *ValueRegion = state->getSVal(Region).getAsRegion();
	if (!ValueRegion) {
	// FIXME: This should not happen. The language guarantees a presence
	// of a valid initializer here, so the reference shall not be undefined.
	// It seems that we're calling destructors over variables that
	// were not initialized yet.
	return;
	}
	Region = ValueRegion->getBaseRegion();
	varType = cast<TypedValueRegion>(Region)->getValueType();
	}

	// FIXME: We need to run the same destructor on every element of the array.
	// This workaround will just run the first destructor (which will still
	// invalidate the entire array).
	EvalCallOptions CallOpts;
	Region = makeElementRegion(state, loc::MemRegionVal(Region), varType,
	CallOpts.IsArrayCtorOrDtor)
	.getAsRegion();

	VisitCXXDestructor(varType, Region, Dtor.getTriggerStmt(),
	/IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessDeleteDtor(const CFGDeleteDtor Dtor,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();
	const CXXDeleteExpr *DE = Dtor.getDeleteExpr();
	const Stmt *Arg = DE->getArgument();
	QualType DTy = DE->getDestroyedType();
	SVal ArgVal = State->getSVal(Arg, LCtx);

	// If the argument to delete is known to be a null value,
	// don't run destructor.
	if (State->isNull(ArgVal).isConstrainedTrue()) {
	QualType BTy = getContext().getBaseElementType(DTy);
	const CXXRecordDecl *RD = BTy->getAsCXXRecordDecl();
	const CXXDestructorDecl *Dtor = RD->getDestructor();

	PostImplicitCall PP(Dtor, DE->getBeginLoc(), LCtx);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateNode(PP, Pred->getState(), Pred);
	return;
	}

	EvalCallOptions CallOpts;
	const MemRegion *ArgR = ArgVal.getAsRegion();
	if (DE->isArrayForm()) {
	// FIXME: We need to run the same destructor on every element of the array.
	// This workaround will just run the first destructor (which will still
	// invalidate the entire array).
	CallOpts.IsArrayCtorOrDtor = true;
	// Yes, it may even be a multi-dimensional array.
	while (const auto *AT = getContext().getAsArrayType(DTy))
	DTy = AT->getElementType();
	if (ArgR)
	ArgR = getStoreManager().GetElementZeroRegion(cast<SubRegion>(ArgR), DTy);
	}

	VisitCXXDestructor(DTy, ArgR, DE, /IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessBaseDtor(const CFGBaseDtor D,
	ExplodedNode *Pred, ExplodedNodeSet &Dst) {
	const LocationContext *LCtx = Pred->getLocationContext();

	const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
	Loc ThisPtr = getSValBuilder().getCXXThis(CurDtor,
	LCtx->getStackFrame());
	SVal ThisVal = Pred->getState()->getSVal(ThisPtr);

	// Create the base object region.
	const CXXBaseSpecifier *Base = D.getBaseSpecifier();
	QualType BaseTy = Base->getType();
	SVal BaseVal = getStoreManager().evalDerivedToBase(ThisVal, BaseTy,
	Base->isVirtual());

	EvalCallOptions CallOpts;
	VisitCXXDestructor(BaseTy, BaseVal.getAsRegion(), CurDtor->getBody(),
	/IsBase=/true, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessMemberDtor(const CFGMemberDtor D,
	ExplodedNode *Pred, ExplodedNodeSet &Dst) {
	const FieldDecl *Member = D.getFieldDecl();
	QualType T = Member->getType();
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();

	const auto *CurDtor = cast<CXXDestructorDecl>(LCtx->getDecl());
	Loc ThisStorageLoc =
	getSValBuilder().getCXXThis(CurDtor, LCtx->getStackFrame());
	Loc ThisLoc = State->getSVal(ThisStorageLoc).castAs<Loc>();
	SVal FieldVal = State->getLValue(Member, ThisLoc);

	// FIXME: We need to run the same destructor on every element of the array.
	// This workaround will just run the first destructor (which will still
	// invalidate the entire array).
	EvalCallOptions CallOpts;
	FieldVal = makeElementRegion(State, FieldVal, T, CallOpts.IsArrayCtorOrDtor);

	VisitCXXDestructor(T, FieldVal.getAsRegion(), CurDtor->getBody(),
	/IsBase=/false, Pred, Dst, CallOpts);
	}

	void ExprEngine::ProcessTemporaryDtor(const CFGTemporaryDtor D,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	const CXXBindTemporaryExpr *BTE = D.getBindTemporaryExpr();
	ProgramStateRef State = Pred->getState();
	const LocationContext *LC = Pred->getLocationContext();
	const MemRegion *MR = nullptr;

	if (Optional<SVal> V =
	getObjectUnderConstruction(State, D.getBindTemporaryExpr(),
	Pred->getLocationContext())) {
	// FIXME: Currently we insert temporary destructors for default parameters,
	// but we don't insert the constructors, so the entry in
	// ObjectsUnderConstruction may be missing.
	State = finishObjectConstruction(State, D.getBindTemporaryExpr(),
	Pred->getLocationContext());
	MR = V->getAsRegion();
	}

	// If copy elision has occurred, and the constructor corresponding to the
	// destructor was elided, we need to skip the destructor as well.
	if (isDestructorElided(State, BTE, LC)) {
	State = cleanupElidedDestructor(State, BTE, LC);
	NodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	PostImplicitCall PP(D.getDestructorDecl(getContext()),
	D.getBindTemporaryExpr()->getBeginLoc(),
	Pred->getLocationContext());
	Bldr.generateNode(PP, State, Pred);
	return;
	}

	ExplodedNodeSet CleanDtorState;
	StmtNodeBuilder StmtBldr(Pred, CleanDtorState, *currBldrCtx);
	StmtBldr.generateNode(D.getBindTemporaryExpr(), Pred, State);

	QualType T = D.getBindTemporaryExpr()->getSubExpr()->getType();
	// FIXME: Currently CleanDtorState can be empty here due to temporaries being
	// bound to default parameters.
	assert(CleanDtorState.size() <= 1);
	ExplodedNode *CleanPred =
	CleanDtorState.empty() ? Pred : *CleanDtorState.begin();

	EvalCallOptions CallOpts;
	CallOpts.IsTemporaryCtorOrDtor = true;
	if (!MR) {
	// If we have no MR, we still need to unwrap the array to avoid destroying
	// the whole array at once. Regardless, we'd eventually need to model array
	// destructors properly, element-by-element.
	while (const ArrayType *AT = getContext().getAsArrayType(T)) {
	T = AT->getElementType();
	CallOpts.IsArrayCtorOrDtor = true;
	}
	} else {
	// We'd eventually need to makeElementRegion() trick here,
	// but for now we don't have the respective construction contexts,
	// so MR would always be null in this case. Do nothing for now.
	}
	VisitCXXDestructor(T, MR, D.getBindTemporaryExpr(),
	/IsBase=/false, CleanPred, Dst, CallOpts);
	}

	void ExprEngine::processCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
	NodeBuilderContext &BldCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	BranchNodeBuilder TempDtorBuilder(Pred, Dst, BldCtx, DstT, DstF);
	ProgramStateRef State = Pred->getState();
	const LocationContext *LC = Pred->getLocationContext();
	if (getObjectUnderConstruction(State, BTE, LC)) {
	TempDtorBuilder.markInfeasible(false);
	TempDtorBuilder.generateNode(State, true, Pred);
	} else {
	TempDtorBuilder.markInfeasible(true);
	TempDtorBuilder.generateNode(State, false, Pred);
	}
	}

	void ExprEngine::VisitCXXBindTemporaryExpr(const CXXBindTemporaryExpr *BTE,
	ExplodedNodeSet &PreVisit,
	ExplodedNodeSet &Dst) {
	// This is a fallback solution in case we didn't have a construction
	// context when we were constructing the temporary. Otherwise the map should
	// have been populated there.
	if (!getAnalysisManager().options.ShouldIncludeTemporaryDtorsInCFG) {
	// In case we don't have temporary destructors in the CFG, do not mark
	// the initialization - we would otherwise never clean it up.
	Dst = PreVisit;
	return;
	}
	StmtNodeBuilder StmtBldr(PreVisit, Dst, *currBldrCtx);
	for (ExplodedNode *Node : PreVisit) {
	ProgramStateRef State = Node->getState();
	const LocationContext *LC = Node->getLocationContext();
	if (!getObjectUnderConstruction(State, BTE, LC)) {
	// FIXME: Currently the state might also already contain the marker due to
	// incorrect handling of temporaries bound to default parameters; for
	// those, we currently skip the CXXBindTemporaryExpr but rely on adding
	// temporary destructor nodes.
	State = addObjectUnderConstruction(State, BTE, LC, UnknownVal());
	}
	StmtBldr.generateNode(BTE, Node, State);
	}
	}

	ProgramStateRef ExprEngine::escapeValues(ProgramStateRef State,
	ArrayRef<SVal> Vs,
	PointerEscapeKind K,
	const CallEvent *Call) const {
	class CollectReachableSymbolsCallback final : public SymbolVisitor {
	InvalidatedSymbols &Symbols;

	public:
	explicit CollectReachableSymbolsCallback(InvalidatedSymbols &Symbols)
	: Symbols(Symbols) {}

	const InvalidatedSymbols &getSymbols() const { return Symbols; }

	bool VisitSymbol(SymbolRef Sym) override {
	Symbols.insert(Sym);
	return true;
	}
	};
	InvalidatedSymbols Symbols;
	CollectReachableSymbolsCallback CallBack(Symbols);
	for (SVal V : Vs)
	State->scanReachableSymbols(V, CallBack);

	return getCheckerManager().runCheckersForPointerEscape(
	State, CallBack.getSymbols(), Call, K, nullptr);
	}

	void ExprEngine::Visit(const Stmt S, ExplodedNode Pred,
	ExplodedNodeSet &DstTop) {
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	S->getBeginLoc(), "Error evaluating statement");
	ExplodedNodeSet Dst;
	StmtNodeBuilder Bldr(Pred, DstTop, *currBldrCtx);

	assert(!isa<Expr>(S) \|\| S == cast<Expr>(S)->IgnoreParens());

	switch (S->getStmtClass()) {
	// C++, OpenMP and ARC stuff we don't support yet.
	case Stmt::CXXDependentScopeMemberExprClass:
	case Stmt::CXXTryStmtClass:
	case Stmt::CXXTypeidExprClass:
	case Stmt::CXXUuidofExprClass:
	case Stmt::CXXFoldExprClass:
	case Stmt::MSPropertyRefExprClass:
	case Stmt::MSPropertySubscriptExprClass:
	case Stmt::CXXUnresolvedConstructExprClass:
	case Stmt::DependentScopeDeclRefExprClass:
	case Stmt::ArrayTypeTraitExprClass:
	case Stmt::ExpressionTraitExprClass:
	case Stmt::UnresolvedLookupExprClass:
	case Stmt::UnresolvedMemberExprClass:
	case Stmt::TypoExprClass:
	case Stmt::RecoveryExprClass:
	case Stmt::CXXNoexceptExprClass:
	case Stmt::PackExpansionExprClass:
	case Stmt::SubstNonTypeTemplateParmPackExprClass:
	case Stmt::FunctionParmPackExprClass:
	case Stmt::CoroutineBodyStmtClass:
	case Stmt::CoawaitExprClass:
	case Stmt::DependentCoawaitExprClass:
	case Stmt::CoreturnStmtClass:
	case Stmt::CoyieldExprClass:
	case Stmt::SEHTryStmtClass:
	case Stmt::SEHExceptStmtClass:
	case Stmt::SEHLeaveStmtClass:
	case Stmt::SEHFinallyStmtClass:
	case Stmt::OMPCanonicalLoopClass:
	case Stmt::OMPParallelDirectiveClass:
	case Stmt::OMPSimdDirectiveClass:
	case Stmt::OMPForDirectiveClass:
	case Stmt::OMPForSimdDirectiveClass:
	case Stmt::OMPSectionsDirectiveClass:
	case Stmt::OMPSectionDirectiveClass:
	case Stmt::OMPSingleDirectiveClass:
	case Stmt::OMPMasterDirectiveClass:
	case Stmt::OMPCriticalDirectiveClass:
	case Stmt::OMPParallelForDirectiveClass:
	case Stmt::OMPParallelForSimdDirectiveClass:
	case Stmt::OMPParallelSectionsDirectiveClass:
	case Stmt::OMPParallelMasterDirectiveClass:
	case Stmt::OMPParallelMaskedDirectiveClass:
	case Stmt::OMPTaskDirectiveClass:
	case Stmt::OMPTaskyieldDirectiveClass:
	case Stmt::OMPBarrierDirectiveClass:
	case Stmt::OMPTaskwaitDirectiveClass:
	case Stmt::OMPTaskgroupDirectiveClass:
	case Stmt::OMPFlushDirectiveClass:
	case Stmt::OMPDepobjDirectiveClass:
	case Stmt::OMPScanDirectiveClass:
	case Stmt::OMPOrderedDirectiveClass:
	case Stmt::OMPAtomicDirectiveClass:
	case Stmt::OMPTargetDirectiveClass:
	case Stmt::OMPTargetDataDirectiveClass:
	case Stmt::OMPTargetEnterDataDirectiveClass:
	case Stmt::OMPTargetExitDataDirectiveClass:
	case Stmt::OMPTargetParallelDirectiveClass:
	case Stmt::OMPTargetParallelForDirectiveClass:
	case Stmt::OMPTargetUpdateDirectiveClass:
	case Stmt::OMPTeamsDirectiveClass:
	case Stmt::OMPCancellationPointDirectiveClass:
	case Stmt::OMPCancelDirectiveClass:
	case Stmt::OMPTaskLoopDirectiveClass:
	case Stmt::OMPTaskLoopSimdDirectiveClass:
	case Stmt::OMPMasterTaskLoopDirectiveClass:
	case Stmt::OMPMaskedTaskLoopDirectiveClass:
	case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
	case Stmt::OMPMaskedTaskLoopSimdDirectiveClass:
	case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
	case Stmt::OMPParallelMaskedTaskLoopDirectiveClass:
	case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
	case Stmt::OMPParallelMaskedTaskLoopSimdDirectiveClass:
	case Stmt::OMPDistributeDirectiveClass:
	case Stmt::OMPDistributeParallelForDirectiveClass:
	case Stmt::OMPDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPDistributeSimdDirectiveClass:
	case Stmt::OMPTargetParallelForSimdDirectiveClass:
	case Stmt::OMPTargetSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeDirectiveClass:
	case Stmt::OMPTeamsDistributeSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPTeamsDistributeParallelForDirectiveClass:
	case Stmt::OMPTargetTeamsDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeParallelForDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeParallelForSimdDirectiveClass:
	case Stmt::OMPTargetTeamsDistributeSimdDirectiveClass:
	case Stmt::OMPTileDirectiveClass:
	case Stmt::OMPInteropDirectiveClass:
	case Stmt::OMPDispatchDirectiveClass:
	case Stmt::OMPMaskedDirectiveClass:
	case Stmt::OMPGenericLoopDirectiveClass:
	case Stmt::OMPTeamsGenericLoopDirectiveClass:
	case Stmt::OMPTargetTeamsGenericLoopDirectiveClass:
	case Stmt::OMPParallelGenericLoopDirectiveClass:
	case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
	case Stmt::CapturedStmtClass:
	case Stmt::OMPUnrollDirectiveClass:
	case Stmt::OMPMetaDirectiveClass: {
	const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
	Engine.addAbortedBlock(node, currBldrCtx->getBlock());
	break;
	}

	case Stmt::ParenExprClass:
	llvm_unreachable("ParenExprs already handled.");
	case Stmt::GenericSelectionExprClass:
	llvm_unreachable("GenericSelectionExprs already handled.");
	// Cases that should never be evaluated simply because they shouldn't
	// appear in the CFG.
	case Stmt::BreakStmtClass:
	case Stmt::CaseStmtClass:
	case Stmt::CompoundStmtClass:
	case Stmt::ContinueStmtClass:
	case Stmt::CXXForRangeStmtClass:
	case Stmt::DefaultStmtClass:
	case Stmt::DoStmtClass:
	case Stmt::ForStmtClass:
	case Stmt::GotoStmtClass:
	case Stmt::IfStmtClass:
	case Stmt::IndirectGotoStmtClass:
	case Stmt::LabelStmtClass:
	case Stmt::NoStmtClass:
	case Stmt::NullStmtClass:
	case Stmt::SwitchStmtClass:
	case Stmt::WhileStmtClass:
	case Expr::MSDependentExistsStmtClass:
	llvm_unreachable("Stmt should not be in analyzer evaluation loop");
	case Stmt::ImplicitValueInitExprClass:
	// These nodes are shared in the CFG and would case caching out.
	// Moreover, no additional evaluation required for them, the
	// analyzer can reconstruct these values from the AST.
	llvm_unreachable("Should be pruned from CFG");

	case Stmt::ObjCSubscriptRefExprClass:
	case Stmt::ObjCPropertyRefExprClass:
	llvm_unreachable("These are handled by PseudoObjectExpr");

	case Stmt::GNUNullExprClass: {
	// GNU __null is a pointer-width integer, not an actual pointer.
	ProgramStateRef state = Pred->getState();
	state = state->BindExpr(
	S, Pred->getLocationContext(),
	svalBuilder.makeIntValWithWidth(getContext().VoidPtrTy, 0));
	Bldr.generateNode(S, Pred, state);
	break;
	}

	case Stmt::ObjCAtSynchronizedStmtClass:
	Bldr.takeNodes(Pred);
	VisitObjCAtSynchronizedStmt(cast<ObjCAtSynchronizedStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Expr::ConstantExprClass:
	case Stmt::ExprWithCleanupsClass:
	// Handled due to fully linearised CFG.
	break;

	case Stmt::CXXBindTemporaryExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
	ExplodedNodeSet Next;
	VisitCXXBindTemporaryExpr(cast<CXXBindTemporaryExpr>(S), PreVisit, Next);
	getCheckerManager().runCheckersForPostStmt(Dst, Next, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::ArrayInitLoopExprClass:
	Bldr.takeNodes(Pred);
	VisitArrayInitLoopExpr(cast<ArrayInitLoopExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	// Cases not handled yet; but will handle some day.
	case Stmt::DesignatedInitExprClass:
	case Stmt::DesignatedInitUpdateExprClass:
	case Stmt::ArrayInitIndexExprClass:
	case Stmt::ExtVectorElementExprClass:
	case Stmt::ImaginaryLiteralClass:
	case Stmt::ObjCAtCatchStmtClass:
	case Stmt::ObjCAtFinallyStmtClass:
	case Stmt::ObjCAtTryStmtClass:
	case Stmt::ObjCAutoreleasePoolStmtClass:
	case Stmt::ObjCEncodeExprClass:
	case Stmt::ObjCIsaExprClass:
	case Stmt::ObjCProtocolExprClass:
	case Stmt::ObjCSelectorExprClass:
	case Stmt::ParenListExprClass:
	case Stmt::ShuffleVectorExprClass:
	case Stmt::ConvertVectorExprClass:
	case Stmt::VAArgExprClass:
	case Stmt::CUDAKernelCallExprClass:
	case Stmt::OpaqueValueExprClass:
	case Stmt::AsTypeExprClass:
	case Stmt::ConceptSpecializationExprClass:
	case Stmt::CXXRewrittenBinaryOperatorClass:
	case Stmt::RequiresExprClass:
	// Fall through.

	// Cases we intentionally don't evaluate, since they don't need
	// to be explicitly evaluated.
	case Stmt::PredefinedExprClass:
	case Stmt::AddrLabelExprClass:
	case Stmt::AttributedStmtClass:
	case Stmt::IntegerLiteralClass:
	case Stmt::FixedPointLiteralClass:
	case Stmt::CharacterLiteralClass:
	case Stmt::CXXScalarValueInitExprClass:
	case Stmt::CXXBoolLiteralExprClass:
	case Stmt::ObjCBoolLiteralExprClass:
	case Stmt::ObjCAvailabilityCheckExprClass:
	case Stmt::FloatingLiteralClass:
	case Stmt::NoInitExprClass:
	case Stmt::SizeOfPackExprClass:
	case Stmt::StringLiteralClass:
	case Stmt::SourceLocExprClass:
	case Stmt::ObjCStringLiteralClass:
	case Stmt::CXXPseudoDestructorExprClass:
	case Stmt::SubstNonTypeTemplateParmExprClass:
	case Stmt::CXXNullPtrLiteralExprClass:
	case Stmt::OMPArraySectionExprClass:
	case Stmt::OMPArrayShapingExprClass:
	case Stmt::OMPIteratorExprClass:
	case Stmt::SYCLUniqueStableNameExprClass:
	case Stmt::TypeTraitExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet preVisit;
	getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
	getCheckerManager().runCheckersForPostStmt(Dst, preVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXDefaultArgExprClass:
	case Stmt::CXXDefaultInitExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet Tmp;
	StmtNodeBuilder Bldr2(PreVisit, Tmp, *currBldrCtx);

	const Expr *ArgE;
	if (const auto *DefE = dyn_cast<CXXDefaultArgExpr>(S))
	ArgE = DefE->getExpr();
	else if (const auto *DefE = dyn_cast<CXXDefaultInitExpr>(S))
	ArgE = DefE->getExpr();
	else
	llvm_unreachable("unknown constant wrapper kind");

	bool IsTemporary = false;
	if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(ArgE)) {
	ArgE = MTE->getSubExpr();
	IsTemporary = true;
	}

	Optional<SVal> ConstantVal = svalBuilder.getConstantVal(ArgE);
	if (!ConstantVal)
	ConstantVal = UnknownVal();

	const LocationContext *LCtx = Pred->getLocationContext();
	for (const auto I : PreVisit) {
	ProgramStateRef State = I->getState();
	State = State->BindExpr(S, LCtx, *ConstantVal);
	if (IsTemporary)
	State = createTemporaryRegionIfNeeded(State, LCtx,
	cast<Expr>(S),
	cast<Expr>(S));
	Bldr2.generateNode(S, I, State);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	// Cases we evaluate as opaque expressions, conjuring a symbol.
	case Stmt::CXXStdInitializerListExprClass:
	case Expr::ObjCArrayLiteralClass:
	case Expr::ObjCDictionaryLiteralClass:
	case Expr::ObjCBoxedExprClass: {
	Bldr.takeNodes(Pred);

	ExplodedNodeSet preVisit;
	getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);

	ExplodedNodeSet Tmp;
	StmtNodeBuilder Bldr2(preVisit, Tmp, *currBldrCtx);

	const auto *Ex = cast<Expr>(S);
	QualType resultType = Ex->getType();

	for (const auto N : preVisit) {
	const LocationContext *LCtx = N->getLocationContext();
	SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
	resultType,
	currBldrCtx->blockCount());
	ProgramStateRef State = N->getState()->BindExpr(Ex, LCtx, result);

	// Escape pointers passed into the list, unless it's an ObjC boxed
	// expression which is not a boxable C structure.
	if (!(isa<ObjCBoxedExpr>(Ex) &&
	!cast<ObjCBoxedExpr>(Ex)->getSubExpr()
	->getType()->isRecordType()))
	for (auto Child : Ex->children()) {
	assert(Child);
	SVal Val = State->getSVal(Child, LCtx);
	State = escapeValues(State, Val, PSK_EscapeOther);
	}

	Bldr2.generateNode(S, N, State);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::ArraySubscriptExprClass:
	Bldr.takeNodes(Pred);
	VisitArraySubscriptExpr(cast<ArraySubscriptExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::MatrixSubscriptExprClass:
	llvm_unreachable("Support for MatrixSubscriptExpr is not implemented.");
	break;

	case Stmt::GCCAsmStmtClass:
	Bldr.takeNodes(Pred);
	VisitGCCAsmStmt(cast<GCCAsmStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::MSAsmStmtClass:
	Bldr.takeNodes(Pred);
	VisitMSAsmStmt(cast<MSAsmStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::BlockExprClass:
	Bldr.takeNodes(Pred);
	VisitBlockExpr(cast<BlockExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::LambdaExprClass:
	if (AMgr.options.ShouldInlineLambdas) {
	Bldr.takeNodes(Pred);
	VisitLambdaExpr(cast<LambdaExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	} else {
	const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
	Engine.addAbortedBlock(node, currBldrCtx->getBlock());
	}
	break;

	case Stmt::BinaryOperatorClass: {
	const auto *B = cast<BinaryOperator>(S);
	if (B->isLogicalOp()) {
	Bldr.takeNodes(Pred);
	VisitLogicalExpr(B, Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}
	else if (B->getOpcode() == BO_Comma) {
	ProgramStateRef state = Pred->getState();
	Bldr.generateNode(B, Pred,
	state->BindExpr(B, Pred->getLocationContext(),
	state->getSVal(B->getRHS(),
	Pred->getLocationContext())));
	break;
	}

	Bldr.takeNodes(Pred);

	if (AMgr.options.ShouldEagerlyAssume &&
	(B->isRelationalOp() \|\| B->isEqualityOp())) {
	ExplodedNodeSet Tmp;
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Tmp);
	evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast<Expr>(S));
	}
	else
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);

	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXOperatorCallExprClass: {
	const auto *OCE = cast<CXXOperatorCallExpr>(S);

	// For instance method operators, make sure the 'this' argument has a
	// valid region.
	const Decl *Callee = OCE->getCalleeDecl();
	if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(Callee)) {
	if (MD->isInstance()) {
	ProgramStateRef State = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();
	ProgramStateRef NewState =
	createTemporaryRegionIfNeeded(State, LCtx, OCE->getArg(0));
	if (NewState != State) {
	Pred = Bldr.generateNode(OCE, Pred, NewState, /tag=/nullptr,
	ProgramPoint::PreStmtKind);
	// Did we cache out?
	if (!Pred)
	break;
	}
	}
	}
	// FALLTHROUGH
	LLVM_FALLTHROUGH;
	}

	case Stmt::CallExprClass:
	case Stmt::CXXMemberCallExprClass:
	case Stmt::UserDefinedLiteralClass:
	Bldr.takeNodes(Pred);
	VisitCallExpr(cast<CallExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXCatchStmtClass:
	Bldr.takeNodes(Pred);
	VisitCXXCatchStmt(cast<CXXCatchStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXTemporaryObjectExprClass:
	case Stmt::CXXConstructExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXConstructExpr(cast<CXXConstructExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXInheritedCtorInitExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXInheritedCtorInitExpr(cast<CXXInheritedCtorInitExpr>(S), Pred,
	Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CXXNewExprClass: {
	Bldr.takeNodes(Pred);

	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet PostVisit;
	for (const auto i : PreVisit)
	VisitCXXNewExpr(cast<CXXNewExpr>(S), i, PostVisit);

	getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXDeleteExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	const auto *CDE = cast<CXXDeleteExpr>(S);
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
	ExplodedNodeSet PostVisit;
	getCheckerManager().runCheckersForPostStmt(PostVisit, PreVisit, S, *this);

	for (const auto i : PostVisit)
	VisitCXXDeleteExpr(CDE, i, Dst);

	Bldr.addNodes(Dst);
	break;
	}
	// FIXME: ChooseExpr is really a constant. We need to fix
	// the CFG do not model them as explicit control-flow.

	case Stmt::ChooseExprClass: { // __builtin_choose_expr
	Bldr.takeNodes(Pred);
	const auto *C = cast<ChooseExpr>(S);
	VisitGuardedExpr(C, C->getLHS(), C->getRHS(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CompoundAssignOperatorClass:
	Bldr.takeNodes(Pred);
	VisitBinaryOperator(cast<BinaryOperator>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::CompoundLiteralExprClass:
	Bldr.takeNodes(Pred);
	VisitCompoundLiteralExpr(cast<CompoundLiteralExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::BinaryConditionalOperatorClass:
	case Stmt::ConditionalOperatorClass: { // '?' operator
	Bldr.takeNodes(Pred);
	const auto *C = cast<AbstractConditionalOperator>(S);
	VisitGuardedExpr(C, C->getTrueExpr(), C->getFalseExpr(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::CXXThisExprClass:
	Bldr.takeNodes(Pred);
	VisitCXXThisExpr(cast<CXXThisExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::DeclRefExprClass: {
	Bldr.takeNodes(Pred);
	const auto *DE = cast<DeclRefExpr>(S);
	VisitCommonDeclRefExpr(DE, DE->getDecl(), Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::DeclStmtClass:
	Bldr.takeNodes(Pred);
	VisitDeclStmt(cast<DeclStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ImplicitCastExprClass:
	case Stmt::CStyleCastExprClass:
	case Stmt::CXXStaticCastExprClass:
	case Stmt::CXXDynamicCastExprClass:
	case Stmt::CXXReinterpretCastExprClass:
	case Stmt::CXXConstCastExprClass:
	case Stmt::CXXFunctionalCastExprClass:
	case Stmt::BuiltinBitCastExprClass:
	case Stmt::ObjCBridgedCastExprClass:
	case Stmt::CXXAddrspaceCastExprClass: {
	Bldr.takeNodes(Pred);
	const auto *C = cast<CastExpr>(S);
	ExplodedNodeSet dstExpr;
	VisitCast(C, C->getSubExpr(), Pred, dstExpr);

	// Handle the postvisit checks.
	getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, C, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Expr::MaterializeTemporaryExprClass: {
	Bldr.takeNodes(Pred);
	const auto *MTE = cast<MaterializeTemporaryExpr>(S);
	ExplodedNodeSet dstPrevisit;
	getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, MTE, *this);
	ExplodedNodeSet dstExpr;
	for (const auto i : dstPrevisit)
	CreateCXXTemporaryObject(MTE, i, dstExpr);
	getCheckerManager().runCheckersForPostStmt(Dst, dstExpr, MTE, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::InitListExprClass:
	Bldr.takeNodes(Pred);
	VisitInitListExpr(cast<InitListExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::MemberExprClass:
	Bldr.takeNodes(Pred);
	VisitMemberExpr(cast<MemberExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::AtomicExprClass:
	Bldr.takeNodes(Pred);
	VisitAtomicExpr(cast<AtomicExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCIvarRefExprClass:
	Bldr.takeNodes(Pred);
	VisitLvalObjCIvarRefExpr(cast<ObjCIvarRefExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCForCollectionStmtClass:
	Bldr.takeNodes(Pred);
	VisitObjCForCollectionStmt(cast<ObjCForCollectionStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCMessageExprClass:
	Bldr.takeNodes(Pred);
	VisitObjCMessage(cast<ObjCMessageExpr>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::ObjCAtThrowStmtClass:
	case Stmt::CXXThrowExprClass:
	// FIXME: This is not complete. We basically treat @throw as
	// an abort.
	Bldr.generateSink(S, Pred, Pred->getState());
	break;

	case Stmt::ReturnStmtClass:
	Bldr.takeNodes(Pred);
	VisitReturnStmt(cast<ReturnStmt>(S), Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::OffsetOfExprClass: {
	Bldr.takeNodes(Pred);
	ExplodedNodeSet PreVisit;
	getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);

	ExplodedNodeSet PostVisit;
	for (const auto Node : PreVisit)
	VisitOffsetOfExpr(cast<OffsetOfExpr>(S), Node, PostVisit);

	getCheckerManager().runCheckersForPostStmt(Dst, PostVisit, S, *this);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::UnaryExprOrTypeTraitExprClass:
	Bldr.takeNodes(Pred);
	VisitUnaryExprOrTypeTraitExpr(cast<UnaryExprOrTypeTraitExpr>(S),
	Pred, Dst);
	Bldr.addNodes(Dst);
	break;

	case Stmt::StmtExprClass: {
	const auto *SE = cast<StmtExpr>(S);

	if (SE->getSubStmt()->body_empty()) {
	// Empty statement expression.
	assert(SE->getType() == getContext().VoidTy
	&& "Empty statement expression must have void type.");
	break;
	}

	if (const auto *LastExpr =
	dyn_cast<Expr>(*SE->getSubStmt()->body_rbegin())) {
	ProgramStateRef state = Pred->getState();
	Bldr.generateNode(SE, Pred,
	state->BindExpr(SE, Pred->getLocationContext(),
	state->getSVal(LastExpr,
	Pred->getLocationContext())));
	}
	break;
	}

	case Stmt::UnaryOperatorClass: {
	Bldr.takeNodes(Pred);
	const auto *U = cast<UnaryOperator>(S);
	if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) {
	ExplodedNodeSet Tmp;
	VisitUnaryOperator(U, Pred, Tmp);
	evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U);
	}
	else
	VisitUnaryOperator(U, Pred, Dst);
	Bldr.addNodes(Dst);
	break;
	}

	case Stmt::PseudoObjectExprClass: {
	Bldr.takeNodes(Pred);
	ProgramStateRef state = Pred->getState();
	const auto *PE = cast<PseudoObjectExpr>(S);
	if (const Expr *Result = PE->getResultExpr()) {
	SVal V = state->getSVal(Result, Pred->getLocationContext());
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(), V));
	}
	else
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(),
	UnknownVal()));

	Bldr.addNodes(Dst);
	break;
	}

	case Expr::ObjCIndirectCopyRestoreExprClass: {
	// ObjCIndirectCopyRestoreExpr implies passing a temporary for
	// correctness of lifetime management. Due to limited analysis
	// of ARC, this is implemented as direct arg passing.
	Bldr.takeNodes(Pred);
	ProgramStateRef state = Pred->getState();
	const auto *OIE = cast<ObjCIndirectCopyRestoreExpr>(S);
	const Expr *E = OIE->getSubExpr();
	SVal V = state->getSVal(E, Pred->getLocationContext());
	Bldr.generateNode(S, Pred,
	state->BindExpr(S, Pred->getLocationContext(), V));
	Bldr.addNodes(Dst);
	break;
	}
	}
	}

	bool ExprEngine::replayWithoutInlining(ExplodedNode *N,
	const LocationContext *CalleeLC) {
	const StackFrameContext *CalleeSF = CalleeLC->getStackFrame();
	const StackFrameContext *CallerSF = CalleeSF->getParent()->getStackFrame();
	assert(CalleeSF && CallerSF);
	ExplodedNode *BeforeProcessingCall = nullptr;
	const Stmt *CE = CalleeSF->getCallSite();

	// Find the first node before we started processing the call expression.
	while (N) {
	ProgramPoint L = N->getLocation();
	BeforeProcessingCall = N;
	N = N->pred_empty() ? nullptr : *(N->pred_begin());

	// Skip the nodes corresponding to the inlined code.
	if (L.getStackFrame() != CallerSF)
	continue;
	// We reached the caller. Find the node right before we started
	// processing the call.
	if (L.isPurgeKind())
	continue;
	if (L.getAs<PreImplicitCall>())
	continue;
	if (L.getAs<CallEnter>())
	continue;
	if (Optional<StmtPoint> SP = L.getAs<StmtPoint>())
	if (SP->getStmt() == CE)
	continue;
	break;
	}

	if (!BeforeProcessingCall)
	return false;

	// TODO: Clean up the unneeded nodes.

	// Build an Epsilon node from which we will restart the analyzes.
	// Note that CE is permitted to be NULL!
	ProgramPoint NewNodeLoc =
	EpsilonPoint(BeforeProcessingCall->getLocationContext(), CE);
	// Add the special flag to GDM to signal retrying with no inlining.
	// Note, changing the state ensures that we are not going to cache out.
	ProgramStateRef NewNodeState = BeforeProcessingCall->getState();
	NewNodeState =
	NewNodeState->set<ReplayWithoutInlining>(const_cast<Stmt *>(CE));

	// Make the new node a successor of BeforeProcessingCall.
	bool IsNew = false;
	ExplodedNode *NewNode = G.getNode(NewNodeLoc, NewNodeState, false, &IsNew);
	// We cached out at this point. Caching out is common due to us backtracking
	// from the inlined function, which might spawn several paths.
	if (!IsNew)
	return true;

	NewNode->addPredecessor(BeforeProcessingCall, G);

	// Add the new node to the work list.
	Engine.enqueueStmtNode(NewNode, CalleeSF->getCallSiteBlock(),
	CalleeSF->getIndex());
	NumTimesRetriedWithoutInlining++;
	return true;
	}

	/// Block entrance. (Update counters).
	void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
	NodeBuilderWithSinks &nodeBuilder,
	ExplodedNode *Pred) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	// If we reach a loop which has a known bound (and meets
	// other constraints) then consider completely unrolling it.
	if(AMgr.options.ShouldUnrollLoops) {
	unsigned maxBlockVisitOnPath = AMgr.options.maxBlockVisitOnPath;
	const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
	if (Term) {
	ProgramStateRef NewState = updateLoopStack(Term, AMgr.getASTContext(),
	Pred, maxBlockVisitOnPath);
	if (NewState != Pred->getState()) {
	ExplodedNode *UpdatedNode = nodeBuilder.generateNode(NewState, Pred);
	if (!UpdatedNode)
	return;
	Pred = UpdatedNode;
	}
	}
	// Is we are inside an unrolled loop then no need the check the counters.
	if(isUnrolledState(Pred->getState()))
	return;
	}

	// If this block is terminated by a loop and it has already been visited the
	// maximum number of times, widen the loop.
	unsigned int BlockCount = nodeBuilder.getContext().blockCount();
	if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
	AMgr.options.ShouldWidenLoops) {
	const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminatorStmt();
	if (!isa_and_nonnull<ForStmt, WhileStmt, DoStmt>(Term))
	return;
	// Widen.
	const LocationContext *LCtx = Pred->getLocationContext();
	ProgramStateRef WidenedState =
	getWidenedLoopState(Pred->getState(), LCtx, BlockCount, Term);
	nodeBuilder.generateNode(WidenedState, Pred);
	return;
	}

	// FIXME: Refactor this into a checker.
	if (BlockCount >= AMgr.options.maxBlockVisitOnPath) {
	static SimpleProgramPointTag tag(TagProviderName, "Block count exceeded");
	const ExplodedNode *Sink =
	nodeBuilder.generateSink(Pred->getState(), Pred, &tag);

	// Check if we stopped at the top level function or not.
	// Root node should have the location context of the top most function.
	const LocationContext *CalleeLC = Pred->getLocation().getLocationContext();
	const LocationContext *CalleeSF = CalleeLC->getStackFrame();
	const LocationContext *RootLC =
	(*G.roots_begin())->getLocation().getLocationContext();
	if (RootLC->getStackFrame() != CalleeSF) {
	Engine.FunctionSummaries->markReachedMaxBlockCount(CalleeSF->getDecl());

	// Re-run the call evaluation without inlining it, by storing the
	// no-inlining policy in the state and enqueuing the new work item on
	// the list. Replay should almost never fail. Use the stats to catch it
	// if it does.
	if ((!AMgr.options.NoRetryExhausted &&
	replayWithoutInlining(Pred, CalleeLC)))
	return;
	NumMaxBlockCountReachedInInlined++;
	} else
	NumMaxBlockCountReached++;

	// Make sink nodes as exhausted(for stats) only if retry failed.
	Engine.blocksExhausted.push_back(std::make_pair(L, Sink));
	}
	}

	//===----------------------------------------------------------------------===//
	// Branch processing.
	//===----------------------------------------------------------------------===//

	/// RecoverCastedSymbol - A helper function for ProcessBranch that is used
	/// to try to recover some path-sensitivity for casts of symbolic
	/// integers that promote their values (which are currently not tracked well).
	/// This function returns the SVal bound to Condition->IgnoreCasts if all the
	// cast(s) did was sign-extend the original value.
	static SVal RecoverCastedSymbol(ProgramStateRef state,
	const Stmt *Condition,
	const LocationContext *LCtx,
	ASTContext &Ctx) {

	const auto *Ex = dyn_cast<Expr>(Condition);
	if (!Ex)
	return UnknownVal();

	uint64_t bits = 0;
	bool bitsInit = false;

	while (const auto *CE = dyn_cast<CastExpr>(Ex)) {
	QualType T = CE->getType();

	if (!T->isIntegralOrEnumerationType())
	return UnknownVal();

	uint64_t newBits = Ctx.getTypeSize(T);
	if (!bitsInit \|\| newBits < bits) {
	bitsInit = true;
	bits = newBits;
	}

	Ex = CE->getSubExpr();
	}

	// We reached a non-cast. Is it a symbolic value?
	QualType T = Ex->getType();

	if (!bitsInit \|\| !T->isIntegralOrEnumerationType() \|\|
	Ctx.getTypeSize(T) > bits)
	return UnknownVal();

	return state->getSVal(Ex, LCtx);
	}

	#ifndef NDEBUG
	static const Stmt getRightmostLeaf(const Stmt Condition) {
	while (Condition) {
	const auto *BO = dyn_cast<BinaryOperator>(Condition);
	if (!BO \|\| !BO->isLogicalOp()) {
	return Condition;
	}
	Condition = BO->getRHS()->IgnoreParens();
	}
	return nullptr;
	}
	#endif

	// Returns the condition the branch at the end of 'B' depends on and whose value
	// has been evaluated within 'B'.
	// In most cases, the terminator condition of 'B' will be evaluated fully in
	// the last statement of 'B'; in those cases, the resolved condition is the
	// given 'Condition'.
	// If the condition of the branch is a logical binary operator tree, the CFG is
	// optimized: in that case, we know that the expression formed by all but the
	// rightmost leaf of the logical binary operator tree must be true, and thus
	// the branch condition is at this point equivalent to the truth value of that
	// rightmost leaf; the CFG block thus only evaluates this rightmost leaf
	// expression in its final statement. As the full condition in that case was
	// not evaluated, and is thus not in the SVal cache, we need to use that leaf
	// expression to evaluate the truth value of the condition in the current state
	// space.
	static const Stmt ResolveCondition(const Stmt Condition,
	const CFGBlock *B) {
	if (const auto *Ex = dyn_cast<Expr>(Condition))
	Condition = Ex->IgnoreParens();

	const auto *BO = dyn_cast<BinaryOperator>(Condition);
	if (!BO \|\| !BO->isLogicalOp())
	return Condition;

	assert(B->getTerminator().isStmtBranch() &&
	"Other kinds of branches are handled separately!");

	// For logical operations, we still have the case where some branches
	// use the traditional "merge" approach and others sink the branch
	// directly into the basic blocks representing the logical operation.
	// We need to distinguish between those two cases here.

	// The invariants are still shifting, but it is possible that the
	// last element in a CFGBlock is not a CFGStmt. Look for the last
	// CFGStmt as the value of the condition.
	CFGBlock::const_reverse_iterator I = B->rbegin(), E = B->rend();
	for (; I != E; ++I) {
	CFGElement Elem = *I;
	Optional<CFGStmt> CS = Elem.getAs<CFGStmt>();
	if (!CS)
	continue;
	const Stmt *LastStmt = CS->getStmt();
	assert(LastStmt == Condition \|\| LastStmt == getRightmostLeaf(Condition));
	return LastStmt;
	}
	llvm_unreachable("could not resolve condition");
	}

	using ObjCForLctxPair =
	std::pair<const ObjCForCollectionStmt , const LocationContext >;

	REGISTER_MAP_WITH_PROGRAMSTATE(ObjCForHasMoreIterations, ObjCForLctxPair, bool)

	ProgramStateRef ExprEngine::setWhetherHasMoreIteration(
	ProgramStateRef State, const ObjCForCollectionStmt *O,
	const LocationContext *LC, bool HasMoreIteraton) {
	assert(!State->contains<ObjCForHasMoreIterations>({O, LC}));
	return State->set<ObjCForHasMoreIterations>({O, LC}, HasMoreIteraton);
	}

	ProgramStateRef
	ExprEngine::removeIterationState(ProgramStateRef State,
	const ObjCForCollectionStmt *O,
	const LocationContext *LC) {
	assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
	return State->remove<ObjCForHasMoreIterations>({O, LC});
	}

	bool ExprEngine::hasMoreIteration(ProgramStateRef State,
	const ObjCForCollectionStmt *O,
	const LocationContext *LC) {
	assert(State->contains<ObjCForHasMoreIterations>({O, LC}));
	return *State->get<ObjCForHasMoreIterations>({O, LC});
	}

	/// Split the state on whether there are any more iterations left for this loop.
	/// Returns a (HasMoreIteration, HasNoMoreIteration) pair, or None when the
	/// acquisition of the loop condition value failed.
	static Optional<std::pair<ProgramStateRef, ProgramStateRef>>
	assumeCondition(const Stmt Condition, ExplodedNode N) {
	ProgramStateRef State = N->getState();
	if (const auto *ObjCFor = dyn_cast<ObjCForCollectionStmt>(Condition)) {
	bool HasMoreIteraton =
	ExprEngine::hasMoreIteration(State, ObjCFor, N->getLocationContext());
	// Checkers have already ran on branch conditions, so the current
	// information as to whether the loop has more iteration becomes outdated
	// after this point.
	State = ExprEngine::removeIterationState(State, ObjCFor,
	N->getLocationContext());
	if (HasMoreIteraton)
	return std::pair<ProgramStateRef, ProgramStateRef>{State, nullptr};
	else
	return std::pair<ProgramStateRef, ProgramStateRef>{nullptr, State};
	}
	SVal X = State->getSVal(Condition, N->getLocationContext());

	if (X.isUnknownOrUndef()) {
	// Give it a chance to recover from unknown.
	if (const auto *Ex = dyn_cast<Expr>(Condition)) {
	if (Ex->getType()->isIntegralOrEnumerationType()) {
	// Try to recover some path-sensitivity. Right now casts of symbolic
	// integers that promote their values are currently not tracked well.
	// If 'Condition' is such an expression, try and recover the
	// underlying value and use that instead.
	SVal recovered =
	RecoverCastedSymbol(State, Condition, N->getLocationContext(),
	N->getState()->getStateManager().getContext());

	if (!recovered.isUnknown()) {
	X = recovered;
	}
	}
	}
	}

	// If the condition is still unknown, give up.
	if (X.isUnknownOrUndef())
	return None;

	DefinedSVal V = X.castAs<DefinedSVal>();

	ProgramStateRef StTrue, StFalse;
	return State->assume(V);
	}

	void ExprEngine::processBranch(const Stmt *Condition,
	NodeBuilderContext& BldCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	assert((!Condition \|\| !isa<CXXBindTemporaryExpr>(Condition)) &&
	"CXXBindTemporaryExprs are handled by processBindTemporary.");
	const LocationContext *LCtx = Pred->getLocationContext();
	PrettyStackTraceLocationContext StackCrashInfo(LCtx);
	currBldrCtx = &BldCtx;

	// Check for NULL conditions; e.g. "for(;;)"
	if (!Condition) {
	BranchNodeBuilder NullCondBldr(Pred, Dst, BldCtx, DstT, DstF);
	NullCondBldr.markInfeasible(false);
	NullCondBldr.generateNode(Pred->getState(), true, Pred);
	return;
	}

	if (const auto *Ex = dyn_cast<Expr>(Condition))
	Condition = Ex->IgnoreParens();

	Condition = ResolveCondition(Condition, BldCtx.getBlock());
	PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),
	Condition->getBeginLoc(),
	"Error evaluating branch");

	ExplodedNodeSet CheckersOutSet;
	getCheckerManager().runCheckersForBranchCondition(Condition, CheckersOutSet,
	Pred, *this);
	// We generated only sinks.
	if (CheckersOutSet.empty())
	return;

	BranchNodeBuilder builder(CheckersOutSet, Dst, BldCtx, DstT, DstF);
	for (ExplodedNode *PredN : CheckersOutSet) {
	if (PredN->isSink())
	continue;

	ProgramStateRef PrevState = PredN->getState();

	ProgramStateRef StTrue, StFalse;
	if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN))
	std::tie(StTrue, StFalse) = *KnownCondValueAssumption;
	else {
	assert(!isa<ObjCForCollectionStmt>(Condition));
	builder.generateNode(PrevState, true, PredN);
	builder.generateNode(PrevState, false, PredN);
	continue;
	}
	if (StTrue && StFalse)
	assert(!isa<ObjCForCollectionStmt>(Condition));

	// Process the true branch.
	if (builder.isFeasible(true)) {
	if (StTrue)
	builder.generateNode(StTrue, true, PredN);
	else
	builder.markInfeasible(true);
	}

	// Process the false branch.
	if (builder.isFeasible(false)) {
	if (StFalse)
	builder.generateNode(StFalse, false, PredN);
	else
	builder.markInfeasible(false);
	}
	}
	currBldrCtx = nullptr;
	}

	/// The GDM component containing the set of global variables which have been
	/// previously initialized with explicit initializers.
	REGISTER_TRAIT_WITH_PROGRAMSTATE(InitializedGlobalsSet,
	llvm::ImmutableSet<const VarDecl *>)

	void ExprEngine::processStaticInitializer(const DeclStmt *DS,
	NodeBuilderContext &BuilderCtx,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const CFGBlock *DstT,
	const CFGBlock *DstF) {
	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
	currBldrCtx = &BuilderCtx;

	const auto *VD = cast<VarDecl>(DS->getSingleDecl());
	ProgramStateRef state = Pred->getState();
	bool initHasRun = state->contains<InitializedGlobalsSet>(VD);
	BranchNodeBuilder builder(Pred, Dst, BuilderCtx, DstT, DstF);

	if (!initHasRun) {
	state = state->add<InitializedGlobalsSet>(VD);
	}

	builder.generateNode(state, initHasRun, Pred);
	builder.markInfeasible(!initHasRun);

	currBldrCtx = nullptr;
	}

	/// processIndirectGoto - Called by CoreEngine. Used to generate successor
	/// nodes by processing the 'effects' of a computed goto jump.
	void ExprEngine::processIndirectGoto(IndirectGotoNodeBuilder &builder) {
	ProgramStateRef state = builder.getState();
	SVal V = state->getSVal(builder.getTarget(), builder.getLocationContext());

	// Three possibilities:
	//
	// (1) We know the computed label.
	// (2) The label is NULL (or some other constant), or Undefined.
	// (3) We have no clue about the label. Dispatch to all targets.
	//

	using iterator = IndirectGotoNodeBuilder::iterator;

	if (Optional<loc::GotoLabel> LV = V.getAs<loc::GotoLabel>()) {
	const LabelDecl *L = LV->getLabel();

	for (iterator I = builder.begin(), E = builder.end(); I != E; ++I) {
	if (I.getLabel() == L) {
	builder.generateNode(I, state);
	return;
	}
	}

	llvm_unreachable("No block with label.");
	}

	if (isa<UndefinedVal, loc::ConcreteInt>(V)) {
	// Dispatch to the first target and mark it as a sink.
	//ExplodedNode* N = builder.generateNode(builder.begin(), state, true);
	// FIXME: add checker visit.
	// UndefBranches.insert(N);
	return;
	}

	// This is really a catch-all. We don't support symbolics yet.
	// FIXME: Implement dispatch for symbolic pointers.

	for (iterator I = builder.begin(), E = builder.end(); I != E; ++I)
	builder.generateNode(I, state);
	}

	void ExprEngine::processBeginOfFunction(NodeBuilderContext &BC,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst,
	const BlockEdge &L) {
	SaveAndRestore<const NodeBuilderContext *> NodeContextRAII(currBldrCtx, &BC);
	getCheckerManager().runCheckersForBeginFunction(Dst, L, Pred, *this);
	}

	/// ProcessEndPath - Called by CoreEngine. Used to generate end-of-path
	/// nodes when the control reaches the end of a function.
	void ExprEngine::processEndOfFunction(NodeBuilderContext& BC,
	ExplodedNode *Pred,
	const ReturnStmt *RS) {
	ProgramStateRef State = Pred->getState();

	if (!Pred->getStackFrame()->inTopFrame())
	State = finishArgumentConstruction(
	State, *getStateManager().getCallEventManager().getCaller(
	Pred->getStackFrame(), Pred->getState()));

	// FIXME: We currently cannot assert that temporaries are clear, because
	// lifetime extended temporaries are not always modelled correctly. In some
	// cases when we materialize the temporary, we do
	// createTemporaryRegionIfNeeded(), and the region changes, and also the
	// respective destructor becomes automatic from temporary. So for now clean up
	// the state manually before asserting. Ideally, this braced block of code
	// should go away.
	{
	const LocationContext *FromLC = Pred->getLocationContext();
	const LocationContext *ToLC = FromLC->getStackFrame()->getParent();
	const LocationContext *LC = FromLC;
	while (LC != ToLC) {
	assert(LC && "ToLC must be a parent of FromLC!");
	for (auto I : State->get<ObjectsUnderConstruction>())
	if (I.first.getLocationContext() == LC) {
	// The comment above only pardons us for not cleaning up a
	// temporary destructor. If any other statements are found here,
	// it must be a separate problem.
	assert(I.first.getItem().getKind() ==
	ConstructionContextItem::TemporaryDestructorKind \|\|
	I.first.getItem().getKind() ==
	ConstructionContextItem::ElidedDestructorKind);
	State = State->remove<ObjectsUnderConstruction>(I.first);
	}
	LC = LC->getParent();
	}
	}

	// Perform the transition with cleanups.
	if (State != Pred->getState()) {
	ExplodedNodeSet PostCleanup;
	NodeBuilder Bldr(Pred, PostCleanup, BC);
	Pred = Bldr.generateNode(Pred->getLocation(), State, Pred);
	if (!Pred) {
	// The node with clean temporaries already exists. We might have reached
	// it on a path on which we initialize different temporaries.
	return;
	}
	}

	assert(areAllObjectsFullyConstructed(Pred->getState(),
	Pred->getLocationContext(),
	Pred->getStackFrame()->getParent()));

	PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());

	ExplodedNodeSet Dst;
	if (Pred->getLocationContext()->inTopFrame()) {
	// Remove dead symbols.
	ExplodedNodeSet AfterRemovedDead;
	removeDeadOnEndOfFunction(BC, Pred, AfterRemovedDead);

	// Notify checkers.
	for (const auto I : AfterRemovedDead)
	getCheckerManager().runCheckersForEndFunction(BC, Dst, I, *this, RS);
	} else {
	getCheckerManager().runCheckersForEndFunction(BC, Dst, Pred, *this, RS);
	}

	Engine.enqueueEndOfFunction(Dst, RS);
	}

	/// ProcessSwitch - Called by CoreEngine. Used to generate successor
	/// nodes by processing the 'effects' of a switch statement.
	void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
	using iterator = SwitchNodeBuilder::iterator;

	ProgramStateRef state = builder.getState();
	const Expr *CondE = builder.getCondition();
	SVal CondV_untested = state->getSVal(CondE, builder.getLocationContext());

	if (CondV_untested.isUndef()) {
	//ExplodedNode* N = builder.generateDefaultCaseNode(state, true);
	// FIXME: add checker
	//UndefBranches.insert(N);

	return;
	}
	DefinedOrUnknownSVal CondV = CondV_untested.castAs<DefinedOrUnknownSVal>();

	ProgramStateRef DefaultSt = state;

	iterator I = builder.begin(), EI = builder.end();
	bool defaultIsFeasible = I == EI;

	for ( ; I != EI; ++I) {
	// Successor may be pruned out during CFG construction.
	if (!I.getBlock())
	continue;

	const CaseStmt *Case = I.getCase();

	// Evaluate the LHS of the case value.
	llvm::APSInt V1 = Case->getLHS()->EvaluateKnownConstInt(getContext());
	assert(V1.getBitWidth() == getContext().getIntWidth(CondE->getType()));

	// Get the RHS of the case, if it exists.
	llvm::APSInt V2;
	if (const Expr *E = Case->getRHS())
	V2 = E->EvaluateKnownConstInt(getContext());
	else
	V2 = V1;

	ProgramStateRef StateCase;
	if (Optional<NonLoc> NL = CondV.getAs<NonLoc>())
	std::tie(StateCase, DefaultSt) =
	DefaultSt->assumeInclusiveRange(*NL, V1, V2);
	else // UnknownVal
	StateCase = DefaultSt;

	if (StateCase)
	builder.generateCaseStmtNode(I, StateCase);

	// Now "assume" that the case doesn't match. Add this state
	// to the default state (if it is feasible).
	if (DefaultSt)
	defaultIsFeasible = true;
	else {
	defaultIsFeasible = false;
	break;
	}
	}

	if (!defaultIsFeasible)
	return;

	// If we have switch(enum value), the default branch is not
	// feasible if all of the enum constants not covered by 'case:' statements
	// are not feasible values for the switch condition.
	//
	// Note that this isn't as accurate as it could be. Even if there isn't
	// a case for a particular enum value as long as that enum value isn't
	// feasible then it shouldn't be considered for making 'default:' reachable.
	const SwitchStmt *SS = builder.getSwitch();
	const Expr *CondExpr = SS->getCond()->IgnoreParenImpCasts();
	if (CondExpr->getType()->getAs<EnumType>()) {
	if (SS->isAllEnumCasesCovered())
	return;
	}

	builder.generateDefaultCaseNode(DefaultSt);
	}

	//===----------------------------------------------------------------------===//
	// Transfer functions: Loads and stores.
	//===----------------------------------------------------------------------===//

	void ExprEngine::VisitCommonDeclRefExpr(const Expr Ex, const NamedDecl D,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);

	ProgramStateRef state = Pred->getState();
	const LocationContext *LCtx = Pred->getLocationContext();

	if (const auto *VD = dyn_cast<VarDecl>(D)) {
	// C permits "extern void v", and if you cast the address to a valid type,
	// you can even do things with it. We simply pretend
	assert(Ex->isGLValue() \|\| VD->getType()->isVoidType());
	const LocationContext *LocCtxt = Pred->getLocationContext();
	const Decl *D = LocCtxt->getDecl();
	const auto *MD = dyn_cast_or_null<CXXMethodDecl>(D);
	const auto *DeclRefEx = dyn_cast<DeclRefExpr>(Ex);
	Optional<std::pair<SVal, QualType>> VInfo;

	if (AMgr.options.ShouldInlineLambdas && DeclRefEx &&
	DeclRefEx->refersToEnclosingVariableOrCapture() && MD &&
	MD->getParent()->isLambda()) {
	// Lookup the field of the lambda.
	const CXXRecordDecl *CXXRec = MD->getParent();
	llvm::DenseMap<const VarDecl , FieldDecl > LambdaCaptureFields;
	FieldDecl *LambdaThisCaptureField;
	CXXRec->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField);

	// Sema follows a sequence of complex rules to determine whether the
	// variable should be captured.
	if (const FieldDecl *FD = LambdaCaptureFields[VD]) {
	Loc CXXThis =
	svalBuilder.getCXXThis(MD, LocCtxt->getStackFrame());
	SVal CXXThisVal = state->getSVal(CXXThis);
	VInfo = std::make_pair(state->getLValue(FD, CXXThisVal), FD->getType());
	}
	}

	if (!VInfo)
	VInfo = std::make_pair(state->getLValue(VD, LocCtxt), VD->getType());

	SVal V = VInfo->first;
	bool IsReference = VInfo->second->isReferenceType();

	// For references, the 'lvalue' is the pointer address stored in the
	// reference region.
	if (IsReference) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	return;
	}
	if (const auto *ED = dyn_cast<EnumConstantDecl>(D)) {
	assert(!Ex->isGLValue());
	SVal V = svalBuilder.makeIntVal(ED->getInitVal());
	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V));
	return;
	}
	if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
	SVal V = svalBuilder.getFunctionPointer(FD);
	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	return;
	}
	if (isa<FieldDecl, IndirectFieldDecl>(D)) {
	// Delegate all work related to pointer to members to the surrounding
	// operator&.
	return;
	}
	if (const auto *BD = dyn_cast<BindingDecl>(D)) {
	const auto *DD = cast<DecompositionDecl>(BD->getDecomposedDecl());

	SVal Base = state->getLValue(DD, LCtx);
	if (DD->getType()->isReferenceType()) {
	if (const MemRegion *R = Base.getAsRegion())
	Base = state->getSVal(R);
	else
	Base = UnknownVal();
	}

	SVal V = UnknownVal();

	// Handle binding to data members
	if (const auto *ME = dyn_cast<MemberExpr>(BD->getBinding())) {
	const auto *Field = cast<FieldDecl>(ME->getMemberDecl());
	V = state->getLValue(Field, Base);
	}
	// Handle binding to arrays
	else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(BD->getBinding())) {
	SVal Idx = state->getSVal(ASE->getIdx(), LCtx);

	// Note: the index of an element in a structured binding is automatically
	// created and it is a unique identifier of the specific element. Thus it
	// cannot be a value that varies at runtime.
	assert(Idx.isConstant() && "BindingDecl array index is not a constant!");

	V = state->getLValue(BD->getType(), Idx, Base);
	}
	// Handle binding to tuple-like structures
	else if (const auto *HV = BD->getHoldingVar()) {
	V = state->getLValue(HV, LCtx);

	if (HV->getType()->isReferenceType()) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}
	} else
	llvm_unreachable("An unknown case of structured binding encountered!");

	// In case of tuple-like types the references are already handled, so we
	// don't want to handle them again.
	if (BD->getType()->isReferenceType() && !BD->getHoldingVar()) {
	if (const MemRegion *R = V.getAsRegion())
	V = state->getSVal(R);
	else
	V = UnknownVal();
	}

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);

	return;
	}

	+ if (const auto *TPO = dyn_cast<TemplateParamObjectDecl>(D)) {
	+ // FIXME: We should meaningfully implement this.
	+ (void)TPO;
	+ return;
	+ }
	+
	llvm_unreachable("Support for this Decl not implemented.");
	}

	/// VisitArrayInitLoopExpr - Transfer function for array init loop.
	void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst) {
	ExplodedNodeSet CheckerPreStmt;
	getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, Ex, *this);

	ExplodedNodeSet EvalSet;
	StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);

	const Expr *Arr = Ex->getCommonExpr()->getSourceExpr();

	for (auto *Node : CheckerPreStmt) {

	// The constructor visitior has already taken care of everything.
	if (auto *CE = dyn_cast<CXXConstructExpr>(Ex->getSubExpr()))
	break;

	const LocationContext *LCtx = Node->getLocationContext();
	ProgramStateRef state = Node->getState();

	SVal Base = UnknownVal();

	// As in case of this expression the sub-expressions are not visited by any
	// other transfer functions, they are handled by matching their AST.

	// Case of implicit copy or move ctor of object with array member
	//
	// Note: ExprEngine::VisitMemberExpr is not able to bind the array to the
	// environment.
	//
	// struct S {
	// int arr[2];
	// };
	//
	//
	// S a;
	// S b = a;
	//
	// The AST in case of a copy constructor looks like this:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-MemberExpr <-- match this
	// \| `-DeclRefExpr
	// ` ...
	//
	//
	// S c;
	// S d = std::move(d);
	//
	// In case of a move constructor the resulting AST looks like:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-MemberExpr <-- match this first
	// \| `-CXXStaticCastExpr <-- match this after
	// \| `-DeclRefExpr
	// ` ...
	if (const auto *ME = dyn_cast<MemberExpr>(Arr)) {
	Expr *MEBase = ME->getBase();

	// Move ctor
	if (auto CXXSCE = dyn_cast<CXXStaticCastExpr>(MEBase)) {
	MEBase = CXXSCE->getSubExpr();
	}

	auto ObjDeclExpr = cast<DeclRefExpr>(MEBase);
	SVal Obj = state->getLValue(cast<VarDecl>(ObjDeclExpr->getDecl()), LCtx);

	Base = state->getLValue(cast<FieldDecl>(ME->getMemberDecl()), Obj);
	}

	// Case of lambda capture and decomposition declaration
	//
	// int arr[2];
	//
	// [arr]{ int a = arr[0]; }();
	// auto[a, b] = arr;
	//
	// In both of these cases the AST looks like the following:
	// ArrayInitLoopExpr
	// \|-OpaqueValueExpr
	// \| `-DeclRefExpr <-- match this
	// ` ...
	if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Arr))
	Base = state->getLValue(cast<VarDecl>(DRE->getDecl()), LCtx);

	// Create a lazy compound value to the original array
	if (const MemRegion *R = Base.getAsRegion())
	Base = state->getSVal(R);
	else
	Base = UnknownVal();

	Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, Base));
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this);
	}

	/// VisitArraySubscriptExpr - Transfer function for array accesses
	void ExprEngine::VisitArraySubscriptExpr(const ArraySubscriptExpr *A,
	ExplodedNode *Pred,
	ExplodedNodeSet &Dst){
	const Expr *Base = A->getBase()->IgnoreParens();
	const Expr *Idx = A->getIdx()->IgnoreParens();

	ExplodedNodeSet CheckerPreStmt;
	getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, A, *this);

	ExplodedNodeSet EvalSet;
	StmtNodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx);

	bool IsVectorType = A->getBase()->getType()->isVectorType();

	// The "like" case is for situations where C standard prohibits the type to
	// be an lvalue, e.g. taking the address of a subscript of an expression of
	// type "void *".
	bool IsGLValueLike = A->isGLValue() \|\|
	(A->getType().isCForbiddenLValueType() && !AMgr.getLangOpts().CPlusPlus);

	for (auto *Node : CheckerPreStmt) {
	const LocationContext *LCtx = Node->getLocationContext();
	ProgramStateRef state = Node->getState();

	if (IsGLValueLike) {
	QualType T = A->getType();

	// One of the forbidden LValue types! We still need to have sensible
	// symbolic locations to represent this stuff. Note that arithmetic on
	// void pointers is a GCC extension.
	if (T->isVoidType())
	T = getContext().CharTy;

	SVal V = state->getLValue(T,
	state->getSVal(Idx, LCtx),
	state->getSVal(Base, LCtx));
	Bldr.generateNode(A, Node, state->BindExpr(A, LCtx, V), nullptr,
	ProgramPoint::PostLValueKind);
	} else if (IsVectorType) {
	// FIXME: non-glvalue vector reads are not modelled.
	Bldr.generateNode(A, Node, state, nullptr);
	} else {
	llvm_unreachable("Array subscript should be an lValue when not \
	a vector and not a forbidden lvalue type");
	}
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, A, *this);
	}

	/// VisitMemberExpr - Transfer function for member expressions.
	void ExprEngine::VisitMemberExpr(const MemberExpr M, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	// FIXME: Prechecks eventually go in ::Visit().
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, M, *this);

	ExplodedNodeSet EvalSet;
	ValueDecl *Member = M->getMemberDecl();

	// Handle static member variables and enum constants accessed via
	// member syntax.
	if (isa<VarDecl, EnumConstantDecl>(Member)) {
	for (const auto I : CheckedSet)
	VisitCommonDeclRefExpr(M, Member, I, EvalSet);
	} else {
	StmtNodeBuilder Bldr(CheckedSet, EvalSet, *currBldrCtx);
	ExplodedNodeSet Tmp;

	for (const auto I : CheckedSet) {
	ProgramStateRef state = I->getState();
	const LocationContext *LCtx = I->getLocationContext();
	Expr *BaseExpr = M->getBase();

	// Handle C++ method calls.
	if (const auto *MD = dyn_cast<CXXMethodDecl>(Member)) {
	if (MD->isInstance())
	state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr);

	SVal MDVal = svalBuilder.getFunctionPointer(MD);
	state = state->BindExpr(M, LCtx, MDVal);

	Bldr.generateNode(M, I, state);
	continue;
	}

	// Handle regular struct fields / member variables.
	const SubRegion *MR = nullptr;
	state = createTemporaryRegionIfNeeded(state, LCtx, BaseExpr,
	/Result=/nullptr,
	/OutRegionWithAdjustments=/&MR);
	SVal baseExprVal =
	MR ? loc::MemRegionVal(MR) : state->getSVal(BaseExpr, LCtx);

	const auto *field = cast<FieldDecl>(Member);
	SVal L = state->getLValue(field, baseExprVal);

	if (M->isGLValue() \|\| M->getType()->isArrayType()) {
	// We special-case rvalues of array type because the analyzer cannot
	// reason about them, since we expect all regions to be wrapped in Locs.
	// We instead treat these as lvalues and assume that they will decay to
	// pointers as soon as they are used.
	if (!M->isGLValue()) {
	assert(M->getType()->isArrayType());
	const auto *PE =
	dyn_cast<ImplicitCastExpr>(I->getParentMap().getParentIgnoreParens(M));
	if (!PE \|\| PE->getCastKind() != CK_ArrayToPointerDecay) {
	llvm_unreachable("should always be wrapped in ArrayToPointerDecay");
	}
	}

	if (field->getType()->isReferenceType()) {
	if (const MemRegion *R = L.getAsRegion())
	L = state->getSVal(R);
	else
	L = UnknownVal();
	}

	Bldr.generateNode(M, I, state->BindExpr(M, LCtx, L), nullptr,
	ProgramPoint::PostLValueKind);
	} else {
	Bldr.takeNodes(I);
	evalLoad(Tmp, M, M, I, state, L);
	Bldr.addNodes(Tmp);
	}
	}
	}

	getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, M, *this);
	}

	void ExprEngine::VisitAtomicExpr(const AtomicExpr AE, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	ExplodedNodeSet AfterPreSet;
	getCheckerManager().runCheckersForPreStmt(AfterPreSet, Pred, AE, *this);

	// For now, treat all the arguments to C11 atomics as escaping.
	// FIXME: Ideally we should model the behavior of the atomics precisely here.

	ExplodedNodeSet AfterInvalidateSet;
	StmtNodeBuilder Bldr(AfterPreSet, AfterInvalidateSet, *currBldrCtx);

	for (const auto I : AfterPreSet) {
	ProgramStateRef State = I->getState();
	const LocationContext *LCtx = I->getLocationContext();

	SmallVector<SVal, 8> ValuesToInvalidate;
	for (unsigned SI = 0, Count = AE->getNumSubExprs(); SI != Count; SI++) {
	const Expr *SubExpr = AE->getSubExprs()[SI];
	SVal SubExprVal = State->getSVal(SubExpr, LCtx);
	ValuesToInvalidate.push_back(SubExprVal);
	}

	State = State->invalidateRegions(ValuesToInvalidate, AE,
	currBldrCtx->blockCount(),
	LCtx,
	/CausedByPointerEscape/true,
	/Symbols=/nullptr);

	SVal ResultVal = UnknownVal();
	State = State->BindExpr(AE, LCtx, ResultVal);
	Bldr.generateNode(AE, I, State, nullptr,
	ProgramPoint::PostStmtKind);
	}

	getCheckerManager().runCheckersForPostStmt(Dst, AfterInvalidateSet, AE, *this);
	}

	// A value escapes in four possible cases:
	// (1) We are binding to something that is not a memory region.
	// (2) We are binding to a MemRegion that does not have stack storage.
	// (3) We are binding to a top-level parameter region with a non-trivial
	// destructor. We won't see the destructor during analysis, but it's there.
	// (4) We are binding to a MemRegion with stack storage that the store
	// does not understand.
	ProgramStateRef ExprEngine::processPointerEscapedOnBind(
	ProgramStateRef State, ArrayRef<std::pair<SVal, SVal>> LocAndVals,
	const LocationContext *LCtx, PointerEscapeKind Kind,
	const CallEvent *Call) {
	SmallVector<SVal, 8> Escaped;
	for (const std::pair<SVal, SVal> &LocAndVal : LocAndVals) {
	// Cases (1) and (2).
	const MemRegion *MR = LocAndVal.first.getAsRegion();
	if (!MR \|\| !MR->hasStackStorage()) {
	Escaped.push_back(LocAndVal.second);
	continue;
	}

	// Case (3).
	if (const auto *VR = dyn_cast<VarRegion>(MR->getBaseRegion()))
	if (VR->hasStackParametersStorage() && VR->getStackFrame()->inTopFrame())
	if (const auto *RD = VR->getValueType()->getAsCXXRecordDecl())
	if (!RD->hasTrivialDestructor()) {
	Escaped.push_back(LocAndVal.second);
	continue;
	}

	// Case (4): in order to test that, generate a new state with the binding
	// added. If it is the same state, then it escapes (since the store cannot
	// represent the binding).
	// Do this only if we know that the store is not supposed to generate the
	// same state.
	SVal StoredVal = State->getSVal(MR);
	if (StoredVal != LocAndVal.second)
	if (State ==
	(State->bindLoc(loc::MemRegionVal(MR), LocAndVal.second, LCtx)))
	Escaped.push_back(LocAndVal.second);
	}

	if (Escaped.empty())
	return State;

	return escapeValues(State, Escaped, Kind, Call);
	}

	ProgramStateRef
	ExprEngine::processPointerEscapedOnBind(ProgramStateRef State, SVal Loc,
	SVal Val, const LocationContext *LCtx) {
	std::pair<SVal, SVal> LocAndVal(Loc, Val);
	return processPointerEscapedOnBind(State, LocAndVal, LCtx, PSK_EscapeOnBind,
	nullptr);
	}

	ProgramStateRef
	ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State,
	const InvalidatedSymbols *Invalidated,
	ArrayRef<const MemRegion *> ExplicitRegions,
	const CallEvent *Call,
	RegionAndSymbolInvalidationTraits &ITraits) {
	if (!Invalidated \|\| Invalidated->empty())
	return State;

	if (!Call)
	return getCheckerManager().runCheckersForPointerEscape(State,
	*Invalidated,
	nullptr,
	PSK_EscapeOther,
	&ITraits);

	// If the symbols were invalidated by a call, we want to find out which ones
	// were invalidated directly due to being arguments to the call.
	InvalidatedSymbols SymbolsDirectlyInvalidated;
	for (const auto I : ExplicitRegions) {
	if (const SymbolicRegion *R = I->StripCasts()->getAs<SymbolicRegion>())
	SymbolsDirectlyInvalidated.insert(R->getSymbol());
	}

	InvalidatedSymbols SymbolsIndirectlyInvalidated;
	for (const auto &sym : *Invalidated) {
	if (SymbolsDirectlyInvalidated.count(sym))
	continue;
	SymbolsIndirectlyInvalidated.insert(sym);
	}

	if (!SymbolsDirectlyInvalidated.empty())
	State = getCheckerManager().runCheckersForPointerEscape(State,
	SymbolsDirectlyInvalidated, Call, PSK_DirectEscapeOnCall, &ITraits);

	// Notify about the symbols that get indirectly invalidated by the call.
	if (!SymbolsIndirectlyInvalidated.empty())
	State = getCheckerManager().runCheckersForPointerEscape(State,
	SymbolsIndirectlyInvalidated, Call, PSK_IndirectEscapeOnCall, &ITraits);

	return State;
	}

	/// evalBind - Handle the semantics of binding a value to a specific location.
	/// This method is used by evalStore and (soon) VisitDeclStmt, and others.
	void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE,
	ExplodedNode *Pred,
	SVal location, SVal Val,
	bool atDeclInit, const ProgramPoint *PP) {
	const LocationContext *LC = Pred->getLocationContext();
	PostStmt PS(StoreE, LC);
	if (!PP)
	PP = &PS;

	// Do a previsit of the bind.
	ExplodedNodeSet CheckedSet;
	getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val,
	StoreE, this, PP);

	StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx);

	// If the location is not a 'Loc', it will already be handled by
	// the checkers. There is nothing left to do.
	if (!isa<Loc>(location)) {
	const ProgramPoint L = PostStore(StoreE, LC, /Loc/nullptr,
	/tag/nullptr);
	ProgramStateRef state = Pred->getState();
	state = processPointerEscapedOnBind(state, location, Val, LC);
	Bldr.generateNode(L, state, Pred);
	return;
	}

	for (const auto PredI : CheckedSet) {
	ProgramStateRef state = PredI->getState();

	state = processPointerEscapedOnBind(state, location, Val, LC);

	// When binding the value, pass on the hint that this is a initialization.
	// For initializations, we do not need to inform clients of region
	// changes.
	state = state->bindLoc(location.castAs<Loc>(),
	Val, LC, /* notifyChanges = */ !atDeclInit);

	const MemRegion *LocReg = nullptr;
	if (Optional<loc::MemRegionVal> LocRegVal =
	location.getAs<loc::MemRegionVal>()) {
	LocReg = LocRegVal->getRegion();
	}

	const ProgramPoint L = PostStore(StoreE, LC, LocReg, nullptr);
	Bldr.generateNode(L, state, PredI);
	}
	}

	/// evalStore - Handle the semantics of a store via an assignment.
	/// @param Dst The node set to store generated state nodes
	/// @param AssignE The assignment expression if the store happens in an
	/// assignment.
	/// @param LocationE The location expression that is stored to.
	/// @param state The current simulation state
	/// @param location The location to store the value
	/// @param Val The value to be stored
	void ExprEngine::evalStore(ExplodedNodeSet &Dst, const Expr *AssignE,
	const Expr *LocationE,
	ExplodedNode *Pred,
	ProgramStateRef state, SVal location, SVal Val,
	const ProgramPointTag *tag) {
	// Proceed with the store. We use AssignE as the anchor for the PostStore
	// ProgramPoint if it is non-NULL, and LocationE otherwise.
	const Expr *StoreE = AssignE ? AssignE : LocationE;

	// Evaluate the location (checks for bad dereferences).
	ExplodedNodeSet Tmp;
	evalLocation(Tmp, AssignE, LocationE, Pred, state, location, false);

	if (Tmp.empty())
	return;

	if (location.isUndef())
	return;

	for (const auto I : Tmp)
	evalBind(Dst, StoreE, I, location, Val, false);
	}

	void ExprEngine::evalLoad(ExplodedNodeSet &Dst,
	const Expr *NodeEx,
	const Expr *BoundEx,
	ExplodedNode *Pred,
	ProgramStateRef state,
	SVal location,
	const ProgramPointTag *tag,
	QualType LoadTy) {
	assert(!isa<NonLoc>(location) && "location cannot be a NonLoc.");
	assert(NodeEx);
	assert(BoundEx);
	// Evaluate the location (checks for bad dereferences).
	ExplodedNodeSet Tmp;
	evalLocation(Tmp, NodeEx, BoundEx, Pred, state, location, true);
	if (Tmp.empty())
	return;

	StmtNodeBuilder Bldr(Tmp, Dst, *currBldrCtx);
	if (location.isUndef())
	return;

	// Proceed with the load.
	for (const auto I : Tmp) {
	state = I->getState();
	const LocationContext *LCtx = I->getLocationContext();

	SVal V = UnknownVal();
	if (location.isValid()) {
	if (LoadTy.isNull())
	LoadTy = BoundEx->getType();
	V = state->getSVal(location.castAs<Loc>(), LoadTy);
	}

	Bldr.generateNode(NodeEx, I, state->BindExpr(BoundEx, LCtx, V), tag,
	ProgramPoint::PostLoadKind);
	}
	}

	void ExprEngine::evalLocation(ExplodedNodeSet &Dst,
	const Stmt *NodeEx,
	const Stmt *BoundEx,
	ExplodedNode *Pred,
	ProgramStateRef state,
	SVal location,
	bool isLoad) {
	StmtNodeBuilder BldrTop(Pred, Dst, *currBldrCtx);
	// Early checks for performance reason.
	if (location.isUnknown()) {
	return;
	}

	ExplodedNodeSet Src;
	BldrTop.takeNodes(Pred);
	StmtNodeBuilder Bldr(Pred, Src, *currBldrCtx);
	if (Pred->getState() != state) {
	// Associate this new state with an ExplodedNode.
	// FIXME: If I pass null tag, the graph is incorrect, e.g for
	// int *p;
	// p = 0;
	// *p = 0xDEADBEEF;
	// "p = 0" is not noted as "Null pointer value stored to 'p'" but
	// instead "int *p" is noted as
	// "Variable 'p' initialized to a null pointer value"

	static SimpleProgramPointTag tag(TagProviderName, "Location");
	Bldr.generateNode(NodeEx, Pred, state, &tag);
	}
	ExplodedNodeSet Tmp;
	getCheckerManager().runCheckersForLocation(Tmp, Src, location, isLoad,
	NodeEx, BoundEx, *this);
	BldrTop.addNodes(Tmp);
	}

	std::pair<const ProgramPointTag , const ProgramPointTag>
	ExprEngine::geteagerlyAssumeBinOpBifurcationTags() {
	static SimpleProgramPointTag
	eagerlyAssumeBinOpBifurcationTrue(TagProviderName,
	"Eagerly Assume True"),
	eagerlyAssumeBinOpBifurcationFalse(TagProviderName,
	"Eagerly Assume False");
	return std::make_pair(&eagerlyAssumeBinOpBifurcationTrue,
	&eagerlyAssumeBinOpBifurcationFalse);
	}

	void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
	ExplodedNodeSet &Src,
	const Expr *Ex) {
	StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);

	for (const auto Pred : Src) {
	// Test if the previous node was as the same expression. This can happen
	// when the expression fails to evaluate to anything meaningful and
	// (as an optimization) we don't generate a node.
	ProgramPoint P = Pred->getLocation();
	if (!P.getAs<PostStmt>() \|\| P.castAs<PostStmt>().getStmt() != Ex) {
	continue;
	}

	ProgramStateRef state = Pred->getState();
	SVal V = state->getSVal(Ex, Pred->getLocationContext());
	Optional<nonloc::SymbolVal> SEV = V.getAs<nonloc::SymbolVal>();
	if (SEV && SEV->isExpression()) {
	const std::pair<const ProgramPointTag , const ProgramPointTag> &tags =
	geteagerlyAssumeBinOpBifurcationTags();

	ProgramStateRef StateTrue, StateFalse;
	std::tie(StateTrue, StateFalse) = state->assume(*SEV);

	// First assume that the condition is true.
	if (StateTrue) {
	SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
	StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
	Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
	}

	// Next, assume that the condition is false.
	if (StateFalse) {
	SVal Val = svalBuilder.makeIntVal(0U, Ex->getType());
	StateFalse = StateFalse->BindExpr(Ex, Pred->getLocationContext(), Val);
	Bldr.generateNode(Ex, Pred, StateFalse, tags.second);
	}
	}
	}
	}

	void ExprEngine::VisitGCCAsmStmt(const GCCAsmStmt A, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	// We have processed both the inputs and the outputs. All of the outputs
	// should evaluate to Locs. Nuke all of their values.

	// FIXME: Some day in the future it would be nice to allow a "plug-in"
	// which interprets the inline asm and stores proper results in the
	// outputs.

	ProgramStateRef state = Pred->getState();

	for (const Expr *O : A->outputs()) {
	SVal X = state->getSVal(O, Pred->getLocationContext());
	assert(!isa<NonLoc>(X)); // Should be an Lval, or unknown, undef.

	if (Optional<Loc> LV = X.getAs<Loc>())
	state = state->bindLoc(*LV, UnknownVal(), Pred->getLocationContext());
	}

	Bldr.generateNode(A, Pred, state);
	}

	void ExprEngine::VisitMSAsmStmt(const MSAsmStmt A, ExplodedNode Pred,
	ExplodedNodeSet &Dst) {
	StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
	Bldr.generateNode(A, Pred, Pred->getState());
	}

	//===----------------------------------------------------------------------===//
	// Visualization.
	//===----------------------------------------------------------------------===//

	namespace llvm {

	template<>
	struct DOTGraphTraits<ExplodedGraph*> : public DefaultDOTGraphTraits {
	DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}

	static bool nodeHasBugReport(const ExplodedNode *N) {
	BugReporter &BR = static_cast<ExprEngine &>(
	N->getState()->getStateManager().getOwningEngine()).getBugReporter();

	const auto EQClasses =
	llvm::make_range(BR.EQClasses_begin(), BR.EQClasses_end());

	for (const auto &EQ : EQClasses) {
	for (const auto &I : EQ.getReports()) {
	const auto *PR = dyn_cast<PathSensitiveBugReport>(I.get());
	if (!PR)
	continue;
	const ExplodedNode *EN = PR->getErrorNode();
	if (EN->getState() == N->getState() &&
	EN->getLocation() == N->getLocation())
	return true;
	}
	}
	return false;
	}

	/// \p PreCallback: callback before break.
	/// \p PostCallback: callback after break.
	/// \p Stop: stop iteration if returns @c true
	/// \return Whether @c Stop ever returned @c true.
	static bool traverseHiddenNodes(
	const ExplodedNode *N,
	llvm::function_ref<void(const ExplodedNode *)> PreCallback,
	llvm::function_ref<void(const ExplodedNode *)> PostCallback,
	llvm::function_ref<bool(const ExplodedNode *)> Stop) {
	while (true) {
	PreCallback(N);
	if (Stop(N))
	return true;

	if (N->succ_size() != 1 \|\| !isNodeHidden(N->getFirstSucc(), nullptr))
	break;
	PostCallback(N);

	N = N->getFirstSucc();
	}
	return false;
	}

	static bool isNodeHidden(const ExplodedNode N, const ExplodedGraph G) {
	return N->isTrivial();
	}

	static std::string getNodeLabel(const ExplodedNode N, ExplodedGraph G){
	std::string Buf;
	llvm::raw_string_ostream Out(Buf);

	const bool IsDot = true;
	const unsigned int Space = 1;
	ProgramStateRef State = N->getState();

	Out << "{ \"state_id\": " << State->getID()
	<< ",\\l";

	Indent(Out, Space, IsDot) << "\"program_points\": [\\l";

	// Dump program point for all the previously skipped nodes.
	traverseHiddenNodes(
	N,
	[&](const ExplodedNode *OtherNode) {
	Indent(Out, Space + 1, IsDot) << "{ ";
	OtherNode->getLocation().printJson(Out, /NL=/"\\l");
	Out << ", \"tag\": ";
	if (const ProgramPointTag *Tag = OtherNode->getLocation().getTag())
	Out << '\"' << Tag->getTagDescription() << "\"";
	else
	Out << "null";
	Out << ", \"node_id\": " << OtherNode->getID() <<
	", \"is_sink\": " << OtherNode->isSink() <<
	", \"has_report\": " << nodeHasBugReport(OtherNode) << " }";
	},
	// Adds a comma and a new-line between each program point.
	[&](const ExplodedNode *) { Out << ",\\l"; },
	[&](const ExplodedNode *) { return false; });

	Out << "\\l"; // Adds a new-line to the last program point.
	Indent(Out, Space, IsDot) << "],\\l";

	State->printDOT(Out, N->getLocationContext(), Space);

	Out << "\\l}\\l";
	return Out.str();
	}
	};

	} // namespace llvm

	void ExprEngine::ViewGraph(bool trim) {
	std::string Filename = DumpGraph(trim);
	llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
	}

	void ExprEngine::ViewGraph(ArrayRef<const ExplodedNode *> Nodes) {
	std::string Filename = DumpGraph(Nodes);
	llvm::DisplayGraph(Filename, false, llvm::GraphProgram::DOT);
	}

	std::string ExprEngine::DumpGraph(bool trim, StringRef Filename) {
	if (trim) {
	std::vector<const ExplodedNode *> Src;

	// Iterate through the reports and get their nodes.
	for (BugReporter::EQClasses_iterator
	EI = BR.EQClasses_begin(), EE = BR.EQClasses_end(); EI != EE; ++EI) {
	const auto *R =
	dyn_cast<PathSensitiveBugReport>(EI->getReports()[0].get());
	if (!R)
	continue;
	const auto N = const_cast<ExplodedNode >(R->getErrorNode());
	Src.push_back(N);
	}
	return DumpGraph(Src, Filename);
	}

	return llvm::WriteGraph(&G, "ExprEngine", /ShortNames=/false,
	/Title=/"Exploded Graph",
	/Filename=/std::string(Filename));
	}

	std::string ExprEngine::DumpGraph(ArrayRef<const ExplodedNode *> Nodes,
	StringRef Filename) {
	std::unique_ptr<ExplodedGraph> TrimmedG(G.trim(Nodes));

	if (!TrimmedG.get()) {
	llvm::errs() << "warning: Trimmed ExplodedGraph is empty.\n";
	return "";
	}

	return llvm::WriteGraph(TrimmedG.get(), "TrimmedExprEngine",
	/ShortNames=/false,
	/Title=/"Trimmed Exploded Graph",
	/Filename=/std::string(Filename));
	}

	void *ProgramStateTrait<ReplayWithoutInlining>::GDMIndex() {
	static int index = 0;
	return &index;
	}

	void ExprEngine::anchor() { }
	diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
	index 9af296b1853a..b29665a63390 100644
	--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
	+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
	@@ -1,10763 +1,10763 @@
	//===-- sanitizer_common_interceptors.inc ------------------------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// Common function interceptors for tools like AddressSanitizer,
	// ThreadSanitizer, MemorySanitizer, etc.
	//
	// This file should be included into the tool's interceptor file,
	// which has to define its own macros:
	// COMMON_INTERCEPTOR_ENTER
	// COMMON_INTERCEPTOR_ENTER_NOIGNORE
	// COMMON_INTERCEPTOR_READ_RANGE
	// COMMON_INTERCEPTOR_WRITE_RANGE
	// COMMON_INTERCEPTOR_INITIALIZE_RANGE
	// COMMON_INTERCEPTOR_DIR_ACQUIRE
	// COMMON_INTERCEPTOR_FD_ACQUIRE
	// COMMON_INTERCEPTOR_FD_RELEASE
	// COMMON_INTERCEPTOR_FD_ACCESS
	// COMMON_INTERCEPTOR_SET_THREAD_NAME
	// COMMON_INTERCEPTOR_DLOPEN
	// COMMON_INTERCEPTOR_ON_EXIT
	// COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
	// COMMON_INTERCEPTOR_MUTEX_POST_LOCK
	// COMMON_INTERCEPTOR_MUTEX_UNLOCK
	// COMMON_INTERCEPTOR_MUTEX_REPAIR
	// COMMON_INTERCEPTOR_SET_PTHREAD_NAME
	// COMMON_INTERCEPTOR_HANDLE_RECVMSG
	// COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
	// COMMON_INTERCEPTOR_MEMSET_IMPL
	// COMMON_INTERCEPTOR_MEMMOVE_IMPL
	// COMMON_INTERCEPTOR_MEMCPY_IMPL
	// COMMON_INTERCEPTOR_MMAP_IMPL
	// COMMON_INTERCEPTOR_COPY_STRING
	// COMMON_INTERCEPTOR_STRNDUP_IMPL
	// COMMON_INTERCEPTOR_STRERROR
	//===----------------------------------------------------------------------===//

	#include "interception/interception.h"
	#include "sanitizer_addrhashmap.h"
	#include "sanitizer_errno.h"
	#include "sanitizer_placement_new.h"
	#include "sanitizer_platform_interceptors.h"
	#include "sanitizer_symbolizer.h"
	#include "sanitizer_tls_get_addr.h"

	#include <stdarg.h>

	#if SANITIZER_INTERCEPTOR_HOOKS
	#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...) f(__VA_ARGS__);
	#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...) \
	SANITIZER_INTERFACE_WEAK_DEF(void, f, __VA_ARGS__) {}
	#else
	#define DECLARE_WEAK_INTERCEPTOR_HOOK(f, ...)
	#define CALL_WEAK_INTERCEPTOR_HOOK(f, ...)

	#endif // SANITIZER_INTERCEPTOR_HOOKS

	#if SANITIZER_WINDOWS && !defined(va_copy)
	#define va_copy(dst, src) ((dst) = (src))
	#endif // _WIN32

	#if SANITIZER_FREEBSD
	#define pthread_setname_np pthread_set_name_np
	#define inet_aton __inet_aton
	#define inet_pton __inet_pton
	#define iconv __bsd_iconv
	#endif

	#if SANITIZER_NETBSD
	#define clock_getres __clock_getres50
	#define clock_gettime __clock_gettime50
	#define clock_settime __clock_settime50
	#define ctime __ctime50
	#define ctime_r __ctime_r50
	#define devname __devname50
	#define fgetpos __fgetpos50
	#define fsetpos __fsetpos50
	#define fstatvfs __fstatvfs90
	#define fstatvfs1 __fstatvfs190
	#define fts_children __fts_children60
	#define fts_close __fts_close60
	#define fts_open __fts_open60
	#define fts_read __fts_read60
	#define fts_set __fts_set60
	#define getitimer __getitimer50
	#define getmntinfo __getmntinfo90
	#define getpwent __getpwent50
	#define getpwnam __getpwnam50
	#define getpwnam_r __getpwnam_r50
	#define getpwuid __getpwuid50
	#define getpwuid_r __getpwuid_r50
	#define getutent __getutent50
	#define getutxent __getutxent50
	#define getutxid __getutxid50
	#define getutxline __getutxline50
	#define getvfsstat __getvfsstat90
	#define pututxline __pututxline50
	#define glob __glob30
	#define gmtime __gmtime50
	#define gmtime_r __gmtime_r50
	#define localtime __locatime50
	#define localtime_r __localtime_r50
	#define mktime __mktime50
	#define lstat __lstat50
	#define opendir __opendir30
	#define readdir __readdir30
	#define readdir_r __readdir_r30
	#define scandir __scandir30
	#define setitimer __setitimer50
	#define setlocale __setlocale50
	#define shmctl __shmctl50
	#define sigaltstack __sigaltstack14
	#define sigemptyset __sigemptyset14
	#define sigfillset __sigfillset14
	#define sigpending __sigpending14
	#define sigprocmask __sigprocmask14
	#define sigtimedwait __sigtimedwait50
	#define stat __stat50
	#define statvfs __statvfs90
	#define statvfs1 __statvfs190
	#define time __time50
	#define times __times13
	#define unvis __unvis50
	#define wait3 __wait350
	#define wait4 __wait450
	extern const unsigned short *_ctype_tab_;
	extern const short *_toupper_tab_;
	extern const short *_tolower_tab_;
	#endif

	#if SANITIZER_MUSL && \
	(defined(__i386__) \|\| defined(__arm__) \|\| SANITIZER_MIPS32 \|\| SANITIZER_PPC32)
	// musl 1.2.0 on existing 32-bit architectures uses new symbol names for the
	// time-related functions that take 64-bit time_t values. See
	// https://musl.libc.org/time64.html
	#define adjtime __adjtime64
	#define adjtimex __adjtimex_time64
	#define aio_suspend __aio_suspend_time64
	#define clock_adjtime __clock_adjtime64
	#define clock_getres __clock_getres_time64
	#define clock_gettime __clock_gettime64
	#define clock_nanosleep __clock_nanosleep_time64
	#define clock_settime __clock_settime64
	#define cnd_timedwait __cnd_timedwait_time64
	#define ctime __ctime64
	#define ctime_r __ctime64_r
	#define difftime __difftime64
	#define dlsym __dlsym_time64
	#define fstatat __fstatat_time64
	#define fstat __fstat_time64
	#define ftime __ftime64
	#define futimens __futimens_time64
	#define futimesat __futimesat_time64
	#define futimes __futimes_time64
	#define getitimer __getitimer_time64
	#define getrusage __getrusage_time64
	#define gettimeofday __gettimeofday_time64
	#define gmtime __gmtime64
	#define gmtime_r __gmtime64_r
	#define localtime __localtime64
	#define localtime_r __localtime64_r
	#define lstat __lstat_time64
	#define lutimes __lutimes_time64
	#define mktime __mktime64
	#define mq_timedreceive __mq_timedreceive_time64
	#define mq_timedsend __mq_timedsend_time64
	#define mtx_timedlock __mtx_timedlock_time64
	#define nanosleep __nanosleep_time64
	#define ppoll __ppoll_time64
	#define pselect __pselect_time64
	#define pthread_cond_timedwait __pthread_cond_timedwait_time64
	#define pthread_mutex_timedlock __pthread_mutex_timedlock_time64
	#define pthread_rwlock_timedrdlock __pthread_rwlock_timedrdlock_time64
	#define pthread_rwlock_timedwrlock __pthread_rwlock_timedwrlock_time64
	#define pthread_timedjoin_np __pthread_timedjoin_np_time64
	#define recvmmsg __recvmmsg_time64
	#define sched_rr_get_interval __sched_rr_get_interval_time64
	#define select __select_time64
	#define semtimedop __semtimedop_time64
	#define sem_timedwait __sem_timedwait_time64
	#define setitimer __setitimer_time64
	#define settimeofday __settimeofday_time64
	#define sigtimedwait __sigtimedwait_time64
	#define stat __stat_time64
	#define stime __stime64
	#define thrd_sleep __thrd_sleep_time64
	#define timegm __timegm_time64
	#define timerfd_gettime __timerfd_gettime64
	#define timerfd_settime __timerfd_settime64
	#define timer_gettime __timer_gettime64
	#define timer_settime __timer_settime64
	#define timespec_get __timespec_get_time64
	#define time __time64
	#define utimensat __utimensat_time64
	#define utimes __utimes_time64
	#define utime __utime64
	#define wait3 __wait3_time64
	#define wait4 __wait4_time64
	#endif

	// Platform-specific options.
	#if SANITIZER_APPLE
	#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0
	#elif SANITIZER_WINDOWS64
	#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 0
	#else
	#define PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE 1
	#endif // SANITIZER_APPLE

	#ifndef COMMON_INTERCEPTOR_INITIALIZE_RANGE
	#define COMMON_INTERCEPTOR_INITIALIZE_RANGE(p, size) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_UNPOISON_PARAM
	#define COMMON_INTERCEPTOR_UNPOISON_PARAM(count) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_FD_ACCESS
	#define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_MUTEX_PRE_LOCK
	#define COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_MUTEX_POST_LOCK
	#define COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_MUTEX_UNLOCK
	#define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_MUTEX_REPAIR
	#define COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_MUTEX_INVALID
	#define COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_HANDLE_RECVMSG
	#define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) ((void)(msg))
	#endif

	#ifndef COMMON_INTERCEPTOR_FILE_OPEN
	#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_FILE_CLOSE
	#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_LIBRARY_LOADED
	#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_LIBRARY_UNLOADED
	#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() {}
	#endif

	#ifndef COMMON_INTERCEPTOR_ENTER_NOIGNORE
	#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, ...) \
	COMMON_INTERCEPTOR_ENTER(ctx, __VA_ARGS__)
	#endif

	#ifndef COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED
	#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED (0)
	#endif

	#define COMMON_INTERCEPTOR_READ_STRING(ctx, s, n) \
	COMMON_INTERCEPTOR_READ_RANGE((ctx), (s), \
	common_flags()->strict_string_checks ? (internal_strlen(s)) + 1 : (n) )

	#ifndef COMMON_INTERCEPTOR_DLOPEN
	#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
	({ CheckNoDeepBind(filename, flag); REAL(dlopen)(filename, flag); })
	#endif

	#ifndef COMMON_INTERCEPTOR_GET_TLS_RANGE
	#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end) begin = end = 0;
	#endif

	#ifndef COMMON_INTERCEPTOR_ACQUIRE
	#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_RELEASE
	#define COMMON_INTERCEPTOR_RELEASE(ctx, u) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_USER_CALLBACK_START
	#define COMMON_INTERCEPTOR_USER_CALLBACK_START() {}
	#endif

	#ifndef COMMON_INTERCEPTOR_USER_CALLBACK_END
	#define COMMON_INTERCEPTOR_USER_CALLBACK_END() {}
	#endif

	#ifdef SANITIZER_NLDBL_VERSION
	#define COMMON_INTERCEPT_FUNCTION_LDBL(fn) \
	COMMON_INTERCEPT_FUNCTION_VER(fn, SANITIZER_NLDBL_VERSION)
	#else
	#define COMMON_INTERCEPT_FUNCTION_LDBL(fn) \
	COMMON_INTERCEPT_FUNCTION(fn)
	#endif

	#if SANITIZER_GLIBC
	// If we could not find the versioned symbol, fall back to an unversioned
	// lookup. This is needed to work around a GLibc bug that causes dlsym
	// with RTLD_NEXT to return the oldest versioned symbol.
	// See https://sourceware.org/bugzilla/show_bug.cgi?id=14932.
	// For certain symbols (e.g. regexec) we have to perform a versioned lookup,
	// but that versioned symbol will only exist for architectures where the
	// oldest Glibc version pre-dates support for that architecture.
	// For example, regexec@GLIBC_2.3.4 exists on x86_64, but not RISC-V.
	// See also https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98920.
	#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
	COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(fn, ver)
	#else
	#define COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(fn, ver) \
	COMMON_INTERCEPT_FUNCTION(fn)
	#endif

	#ifndef COMMON_INTERCEPTOR_MEMSET_IMPL
	#define COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size) \
	{ \
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) \
	return internal_memset(dst, v, size); \
	COMMON_INTERCEPTOR_ENTER(ctx, memset, dst, v, size); \
	if (common_flags()->intercept_intrin) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
	return REAL(memset)(dst, v, size); \
	}
	#endif

	#ifndef COMMON_INTERCEPTOR_MEMMOVE_IMPL
	#define COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size) \
	{ \
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) \
	return internal_memmove(dst, src, size); \
	COMMON_INTERCEPTOR_ENTER(ctx, memmove, dst, src, size); \
	if (common_flags()->intercept_intrin) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size); \
	} \
	return REAL(memmove)(dst, src, size); \
	}
	#endif

	#ifndef COMMON_INTERCEPTOR_MEMCPY_IMPL
	#define COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size) \
	{ \
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED) { \
	return internal_memmove(dst, src, size); \
	} \
	COMMON_INTERCEPTOR_ENTER(ctx, memcpy, dst, src, size); \
	if (common_flags()->intercept_intrin) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, size); \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, size); \
	} \
	return REAL(memcpy)(dst, src, size); \
	}
	#endif

	#ifndef COMMON_INTERCEPTOR_MMAP_IMPL
	#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd, \
	off) \
	{ return REAL(mmap)(addr, sz, prot, flags, fd, off); }
	#endif

	#ifndef COMMON_INTERCEPTOR_COPY_STRING
	#define COMMON_INTERCEPTOR_COPY_STRING(ctx, to, from, size) {}
	#endif

	#ifndef COMMON_INTERCEPTOR_STRNDUP_IMPL
	#define COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size) \
	COMMON_INTERCEPTOR_ENTER(ctx, strndup, s, size); \
	uptr copy_length = internal_strnlen(s, size); \
	char new_mem = (char )WRAP(malloc)(copy_length + 1); \
	if (common_flags()->intercept_strndup) { \
	COMMON_INTERCEPTOR_READ_STRING(ctx, s, Min(size, copy_length + 1)); \
	} \
	if (new_mem) { \
	COMMON_INTERCEPTOR_COPY_STRING(ctx, new_mem, s, copy_length); \
	internal_memcpy(new_mem, s, copy_length); \
	new_mem[copy_length] = '\0'; \
	} \
	return new_mem;
	#endif

	#ifndef COMMON_INTERCEPTOR_STRERROR
	#define COMMON_INTERCEPTOR_STRERROR() {}
	#endif

	struct FileMetadata {
	// For open_memstream().
	char **addr;
	SIZE_T *size;
	};

	struct CommonInterceptorMetadata {
	enum {
	CIMT_INVALID = 0,
	CIMT_FILE
	} type;
	union {
	FileMetadata file;
	};
	};

	#if SI_POSIX
	typedef AddrHashMap<CommonInterceptorMetadata, 31051> MetadataHashMap;

	static MetadataHashMap *interceptor_metadata_map;

	UNUSED static void SetInterceptorMetadata(__sanitizer_FILE *addr,
	const FileMetadata &file) {
	MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr);
	CHECK(h.created());
	h->type = CommonInterceptorMetadata::CIMT_FILE;
	h->file = file;
	}

	UNUSED static const FileMetadata *GetInterceptorMetadata(
	__sanitizer_FILE *addr) {
	MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr,
	/* remove */ false,
	/* create */ false);
	if (addr && h.exists()) {
	CHECK(!h.created());
	CHECK(h->type == CommonInterceptorMetadata::CIMT_FILE);
	return &h->file;
	} else {
	return 0;
	}
	}

	UNUSED static void DeleteInterceptorMetadata(void *addr) {
	MetadataHashMap::Handle h(interceptor_metadata_map, (uptr)addr, true);
	CHECK(h.exists());
	}
	#endif // SI_POSIX

	#if SANITIZER_INTERCEPT_STRLEN
	INTERCEPTOR(SIZE_T, strlen, const char *s) {
	// Sometimes strlen is called prior to InitializeCommonInterceptors,
	// in which case the REAL(strlen) typically used in
	// COMMON_INTERCEPTOR_ENTER will fail. We use internal_strlen here
	// to handle that.
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_strlen(s);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strlen, s);
	SIZE_T result = REAL(strlen)(s);
	if (common_flags()->intercept_strlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, result + 1);
	return result;
	}
	#define INIT_STRLEN COMMON_INTERCEPT_FUNCTION(strlen)
	#else
	#define INIT_STRLEN
	#endif

	#if SANITIZER_INTERCEPT_STRNLEN
	INTERCEPTOR(SIZE_T, strnlen, const char *s, SIZE_T maxlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strnlen, s, maxlen);
	SIZE_T length = REAL(strnlen)(s, maxlen);
	if (common_flags()->intercept_strlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, Min(length + 1, maxlen));
	return length;
	}
	#define INIT_STRNLEN COMMON_INTERCEPT_FUNCTION(strnlen)
	#else
	#define INIT_STRNLEN
	#endif

	#if SANITIZER_INTERCEPT_STRNDUP
	INTERCEPTOR(char, strndup, const char s, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
	}
	#define INIT_STRNDUP COMMON_INTERCEPT_FUNCTION(strndup)
	#else
	#define INIT_STRNDUP
	#endif // SANITIZER_INTERCEPT_STRNDUP

	#if SANITIZER_INTERCEPT___STRNDUP
	INTERCEPTOR(char, __strndup, const char s, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_STRNDUP_IMPL(ctx, s, size);
	}
	#define INIT___STRNDUP COMMON_INTERCEPT_FUNCTION(__strndup)
	#else
	#define INIT___STRNDUP
	#endif // SANITIZER_INTERCEPT___STRNDUP

	#if SANITIZER_INTERCEPT_TEXTDOMAIN
	INTERCEPTOR(char, textdomain, const char domainname) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, textdomain, domainname);
	if (domainname) COMMON_INTERCEPTOR_READ_STRING(ctx, domainname, 0);
	char *domain = REAL(textdomain)(domainname);
	if (domain) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(domain, internal_strlen(domain) + 1);
	}
	return domain;
	}
	#define INIT_TEXTDOMAIN COMMON_INTERCEPT_FUNCTION(textdomain)
	#else
	#define INIT_TEXTDOMAIN
	#endif

	#if SANITIZER_INTERCEPT_STRCMP
	static inline int CharCmpX(unsigned char c1, unsigned char c2) {
	return (c1 == c2) ? 0 : (c1 < c2) ? -1 : 1;
	}

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcmp, uptr called_pc,
	const char s1, const char s2, int result)

	INTERCEPTOR(int, strcmp, const char s1, const char s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strcmp, s1, s2);
	unsigned char c1, c2;
	uptr i;
	for (i = 0;; i++) {
	c1 = (unsigned char)s1[i];
	c2 = (unsigned char)s2[i];
	if (c1 != c2 \|\| c1 == '\0') break;
	}
	if (common_flags()->intercept_strcmp) {
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1, i + 1);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s2, i + 1);
	}
	int result = CharCmpX(c1, c2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcmp, GET_CALLER_PC(), s1,
	s2, result);
	return result;
	}

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, uptr called_pc,
	const char s1, const char s2, uptr n,
	int result)

	INTERCEPTOR(int, strncmp, const char s1, const char s2, uptr size) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_strncmp(s1, s2, size);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strncmp, s1, s2, size);
	unsigned char c1 = 0, c2 = 0;
	uptr i;
	for (i = 0; i < size; i++) {
	c1 = (unsigned char)s1[i];
	c2 = (unsigned char)s2[i];
	if (c1 != c2 \|\| c1 == '\0') break;
	}
	uptr i1 = i;
	uptr i2 = i;
	if (common_flags()->strict_string_checks) {
	for (; i1 < size && s1[i1]; i1++) {}
	for (; i2 < size && s2[i2]; i2++) {}
	}
	COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
	COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
	int result = CharCmpX(c1, c2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncmp, GET_CALLER_PC(), s1,
	s2, size, result);
	return result;
	}

	#define INIT_STRCMP COMMON_INTERCEPT_FUNCTION(strcmp)
	#define INIT_STRNCMP COMMON_INTERCEPT_FUNCTION(strncmp)
	#else
	#define INIT_STRCMP
	#define INIT_STRNCMP
	#endif

	#if SANITIZER_INTERCEPT_STRCASECMP
	static inline int CharCaseCmp(unsigned char c1, unsigned char c2) {
	int c1_low = ToLower(c1);
	int c2_low = ToLower(c2);
	return c1_low - c2_low;
	}

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, uptr called_pc,
	const char s1, const char s2, int result)

	INTERCEPTOR(int, strcasecmp, const char s1, const char s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strcasecmp, s1, s2);
	unsigned char c1 = 0, c2 = 0;
	uptr i;
	for (i = 0;; i++) {
	c1 = (unsigned char)s1[i];
	c2 = (unsigned char)s2[i];
	if (CharCaseCmp(c1, c2) != 0 \|\| c1 == '\0') break;
	}
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1, i + 1);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s2, i + 1);
	int result = CharCaseCmp(c1, c2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasecmp, GET_CALLER_PC(),
	s1, s2, result);
	return result;
	}

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, uptr called_pc,
	const char s1, const char s2, uptr size,
	int result)

	INTERCEPTOR(int, strncasecmp, const char s1, const char s2, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strncasecmp, s1, s2, size);
	unsigned char c1 = 0, c2 = 0;
	uptr i;
	for (i = 0; i < size; i++) {
	c1 = (unsigned char)s1[i];
	c2 = (unsigned char)s2[i];
	if (CharCaseCmp(c1, c2) != 0 \|\| c1 == '\0') break;
	}
	uptr i1 = i;
	uptr i2 = i;
	if (common_flags()->strict_string_checks) {
	for (; i1 < size && s1[i1]; i1++) {}
	for (; i2 < size && s2[i2]; i2++) {}
	}
	COMMON_INTERCEPTOR_READ_RANGE((ctx), (s1), Min(i1 + 1, size));
	COMMON_INTERCEPTOR_READ_RANGE((ctx), (s2), Min(i2 + 1, size));
	int result = CharCaseCmp(c1, c2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strncasecmp, GET_CALLER_PC(),
	s1, s2, size, result);
	return result;
	}

	#define INIT_STRCASECMP COMMON_INTERCEPT_FUNCTION(strcasecmp)
	#define INIT_STRNCASECMP COMMON_INTERCEPT_FUNCTION(strncasecmp)
	#else
	#define INIT_STRCASECMP
	#define INIT_STRNCASECMP
	#endif

	#if SANITIZER_INTERCEPT_STRSTR \|\| SANITIZER_INTERCEPT_STRCASESTR
	static inline void StrstrCheck(void ctx, char r, const char *s1,
	const char *s2) {
	uptr len1 = internal_strlen(s1);
	uptr len2 = internal_strlen(s2);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r ? r - s1 + len2 : len1 + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2 + 1);
	}
	#endif

	#if SANITIZER_INTERCEPT_STRSTR

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, uptr called_pc,
	const char s1, const char s2, char *result)

	INTERCEPTOR(char, strstr, const char s1, const char *s2) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_strstr(s1, s2);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strstr, s1, s2);
	char *r = REAL(strstr)(s1, s2);
	if (common_flags()->intercept_strstr)
	StrstrCheck(ctx, r, s1, s2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strstr, GET_CALLER_PC(), s1,
	s2, r);
	return r;
	}

	#define INIT_STRSTR COMMON_INTERCEPT_FUNCTION(strstr);
	#else
	#define INIT_STRSTR
	#endif

	#if SANITIZER_INTERCEPT_STRCASESTR

	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, uptr called_pc,
	const char s1, const char s2, char *result)

	INTERCEPTOR(char, strcasestr, const char s1, const char *s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strcasestr, s1, s2);
	char *r = REAL(strcasestr)(s1, s2);
	if (common_flags()->intercept_strstr)
	StrstrCheck(ctx, r, s1, s2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_strcasestr, GET_CALLER_PC(),
	s1, s2, r);
	return r;
	}

	#define INIT_STRCASESTR COMMON_INTERCEPT_FUNCTION(strcasestr);
	#else
	#define INIT_STRCASESTR
	#endif

	#if SANITIZER_INTERCEPT_STRTOK

	INTERCEPTOR(char, strtok, char str, const char *delimiters) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtok, str, delimiters);
	if (!common_flags()->intercept_strtok) {
	return REAL(strtok)(str, delimiters);
	}
	if (common_flags()->strict_string_checks) {
	// If strict_string_checks is enabled, we check the whole first argument
	// string on the first call (strtok saves this string in a static buffer
	// for subsequent calls). We do not need to check strtok's result.
	// As the delimiters can change, we check them every call.
	if (str != nullptr) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
	}
	COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters,
	internal_strlen(delimiters) + 1);
	return REAL(strtok)(str, delimiters);
	} else {
	// However, when strict_string_checks is disabled we cannot check the
	// whole string on the first call. Instead, we check the result string
	// which is guaranteed to be a NULL-terminated substring of the first
	// argument. We also conservatively check one character of str and the
	// delimiters.
	if (str != nullptr) {
	COMMON_INTERCEPTOR_READ_STRING(ctx, str, 1);
	}
	COMMON_INTERCEPTOR_READ_RANGE(ctx, delimiters, 1);
	char *result = REAL(strtok)(str, delimiters);
	if (result != nullptr) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, result, internal_strlen(result) + 1);
	} else if (str != nullptr) {
	// No delimiter were found, it's safe to assume that the entire str was
	// scanned.
	COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
	}
	return result;
	}
	}

	#define INIT_STRTOK COMMON_INTERCEPT_FUNCTION(strtok)
	#else
	#define INIT_STRTOK
	#endif

	#if SANITIZER_INTERCEPT_MEMMEM
	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, uptr called_pc,
	const void s1, SIZE_T len1, const void s2,
	SIZE_T len2, void *result)

	INTERCEPTOR(void, memmem, const void s1, SIZE_T len1, const void *s2,
	SIZE_T len2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, memmem, s1, len1, s2, len2);
	void *r = REAL(memmem)(s1, len1, s2, len2);
	if (common_flags()->intercept_memmem) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, len1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, len2);
	}
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memmem, GET_CALLER_PC(),
	s1, len1, s2, len2, r);
	return r;
	}

	#define INIT_MEMMEM COMMON_INTERCEPT_FUNCTION(memmem);
	#else
	#define INIT_MEMMEM
	#endif // SANITIZER_INTERCEPT_MEMMEM

	#if SANITIZER_INTERCEPT_STRCHR
	INTERCEPTOR(char, strchr, const char s, int c) {
	void *ctx;
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_strchr(s, c);
	COMMON_INTERCEPTOR_ENTER(ctx, strchr, s, c);
	char *result = REAL(strchr)(s, c);
	if (common_flags()->intercept_strchr) {
	// Keep strlen as macro argument, as macro may ignore it.
	COMMON_INTERCEPTOR_READ_STRING(ctx, s,
	(result ? result - s : internal_strlen(s)) + 1);
	}
	return result;
	}
	#define INIT_STRCHR COMMON_INTERCEPT_FUNCTION(strchr)
	#else
	#define INIT_STRCHR
	#endif

	#if SANITIZER_INTERCEPT_STRCHRNUL
	INTERCEPTOR(char, strchrnul, const char s, int c) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strchrnul, s, c);
	char *result = REAL(strchrnul)(s, c);
	uptr len = result - s + 1;
	if (common_flags()->intercept_strchr)
	COMMON_INTERCEPTOR_READ_STRING(ctx, s, len);
	return result;
	}
	#define INIT_STRCHRNUL COMMON_INTERCEPT_FUNCTION(strchrnul)
	#else
	#define INIT_STRCHRNUL
	#endif

	#if SANITIZER_INTERCEPT_STRRCHR
	INTERCEPTOR(char, strrchr, const char s, int c) {
	void *ctx;
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_strrchr(s, c);
	COMMON_INTERCEPTOR_ENTER(ctx, strrchr, s, c);
	if (common_flags()->intercept_strchr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	return REAL(strrchr)(s, c);
	}
	#define INIT_STRRCHR COMMON_INTERCEPT_FUNCTION(strrchr)
	#else
	#define INIT_STRRCHR
	#endif

	#if SANITIZER_INTERCEPT_STRSPN
	INTERCEPTOR(SIZE_T, strspn, const char s1, const char s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strspn, s1, s2);
	SIZE_T r = REAL(strspn)(s1, s2);
	if (common_flags()->intercept_strspn) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
	}
	return r;
	}

	INTERCEPTOR(SIZE_T, strcspn, const char s1, const char s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strcspn, s1, s2);
	SIZE_T r = REAL(strcspn)(s1, s2);
	if (common_flags()->intercept_strspn) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1, r + 1);
	}
	return r;
	}

	#define INIT_STRSPN \
	COMMON_INTERCEPT_FUNCTION(strspn); \
	COMMON_INTERCEPT_FUNCTION(strcspn);
	#else
	#define INIT_STRSPN
	#endif

	#if SANITIZER_INTERCEPT_STRPBRK
	INTERCEPTOR(char , strpbrk, const char s1, const char *s2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strpbrk, s1, s2);
	char *r = REAL(strpbrk)(s1, s2);
	if (common_flags()->intercept_strpbrk) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, internal_strlen(s2) + 1);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s1,
	r ? r - s1 + 1 : internal_strlen(s1) + 1);
	}
	return r;
	}

	#define INIT_STRPBRK COMMON_INTERCEPT_FUNCTION(strpbrk);
	#else
	#define INIT_STRPBRK
	#endif

	#if SANITIZER_INTERCEPT_MEMSET
	INTERCEPTOR(void , memset, void dst, int v, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, dst, v, size);
	}

	#define INIT_MEMSET COMMON_INTERCEPT_FUNCTION(memset)
	#else
	#define INIT_MEMSET
	#endif

	#if SANITIZER_INTERCEPT_MEMMOVE
	INTERCEPTOR(void , memmove, void dst, const void *src, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
	}

	#define INIT_MEMMOVE COMMON_INTERCEPT_FUNCTION(memmove)
	#else
	#define INIT_MEMMOVE
	#endif

	#if SANITIZER_INTERCEPT_MEMCPY
	INTERCEPTOR(void , memcpy, void dst, const void *src, uptr size) {
	// On OS X, calling internal_memcpy here will cause memory corruptions,
	// because memcpy and memmove are actually aliases of the same
	// implementation. We need to use internal_memmove here.
	// N.B.: If we switch this to internal_ we'll have to use internal_memmove
	// due to memcpy being an alias of memmove on OS X.
	void *ctx;
	#if PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE
	COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, dst, src, size);
	#else
	COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, dst, src, size);
	#endif
	}

	#define INIT_MEMCPY \
	do { \
	if (PLATFORM_HAS_DIFFERENT_MEMCPY_AND_MEMMOVE) { \
	COMMON_INTERCEPT_FUNCTION(memcpy); \
	} else { \
	ASSIGN_REAL(memcpy, memmove); \
	} \
	CHECK(REAL(memcpy)); \
	} while (false)

	#else
	#define INIT_MEMCPY
	#endif

	#if SANITIZER_INTERCEPT_MEMCMP
	DECLARE_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, uptr called_pc,
	const void s1, const void s2, uptr n,
	int result)

	// Common code for `memcmp` and `bcmp`.
	int MemcmpInterceptorCommon(void *ctx,
	int (real_fn)(const void , const void *, uptr),
	const void a1, const void a2, uptr size) {
	if (common_flags()->intercept_memcmp) {
	if (common_flags()->strict_memcmp) {
	// Check the entire regions even if the first bytes of the buffers are
	// different.
	COMMON_INTERCEPTOR_READ_RANGE(ctx, a1, size);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, a2, size);
	// Fallthrough to REAL(memcmp) below.
	} else {
	unsigned char c1 = 0, c2 = 0;
	const unsigned char s1 = (const unsigned char)a1;
	const unsigned char s2 = (const unsigned char)a2;
	uptr i;
	for (i = 0; i < size; i++) {
	c1 = s1[i];
	c2 = s2[i];
	if (c1 != c2) break;
	}
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s1, Min(i + 1, size));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s2, Min(i + 1, size));
	int r = CharCmpX(c1, c2);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, GET_CALLER_PC(),
	a1, a2, size, r);
	return r;
	}
	}
	int result = real_fn(a1, a2, size);
	CALL_WEAK_INTERCEPTOR_HOOK(__sanitizer_weak_hook_memcmp, GET_CALLER_PC(), a1,
	a2, size, result);
	return result;
	}

	INTERCEPTOR(int, memcmp, const void a1, const void a2, uptr size) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_memcmp(a1, a2, size);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, memcmp, a1, a2, size);
	return MemcmpInterceptorCommon(ctx, REAL(memcmp), a1, a2, size);
	}

	#define INIT_MEMCMP COMMON_INTERCEPT_FUNCTION(memcmp)
	#else
	#define INIT_MEMCMP
	#endif

	#if SANITIZER_INTERCEPT_BCMP
	INTERCEPTOR(int, bcmp, const void a1, const void a2, uptr size) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_memcmp(a1, a2, size);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, bcmp, a1, a2, size);
	return MemcmpInterceptorCommon(ctx, REAL(bcmp), a1, a2, size);
	}

	#define INIT_BCMP COMMON_INTERCEPT_FUNCTION(bcmp)
	#else
	#define INIT_BCMP
	#endif

	#if SANITIZER_INTERCEPT_MEMCHR
	INTERCEPTOR(void, memchr, const void s, int c, SIZE_T n) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_memchr(s, c, n);
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, memchr, s, c, n);
	#if SANITIZER_WINDOWS
	void *res;
	if (REAL(memchr)) {
	res = REAL(memchr)(s, c, n);
	} else {
	res = internal_memchr(s, c, n);
	}
	#else
	void *res = REAL(memchr)(s, c, n);
	#endif
	uptr len = res ? (char )res - (const char )s + 1 : n;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, len);
	return res;
	}

	#define INIT_MEMCHR COMMON_INTERCEPT_FUNCTION(memchr)
	#else
	#define INIT_MEMCHR
	#endif

	#if SANITIZER_INTERCEPT_MEMRCHR
	INTERCEPTOR(void, memrchr, const void s, int c, SIZE_T n) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, memrchr, s, c, n);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, n);
	return REAL(memrchr)(s, c, n);
	}

	#define INIT_MEMRCHR COMMON_INTERCEPT_FUNCTION(memrchr)
	#else
	#define INIT_MEMRCHR
	#endif

	#if SANITIZER_INTERCEPT_FREXP
	INTERCEPTOR(double, frexp, double x, int *exp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, frexp, x, exp);
	// Assuming frexp() always writes to \|exp\|.
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
	double res = REAL(frexp)(x, exp);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
	return res;
	}

	#define INIT_FREXP COMMON_INTERCEPT_FUNCTION(frexp);
	#else
	#define INIT_FREXP
	#endif // SANITIZER_INTERCEPT_FREXP

	#if SANITIZER_INTERCEPT_FREXPF_FREXPL
	INTERCEPTOR(float, frexpf, float x, int *exp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, frexpf, x, exp);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
	float res = REAL(frexpf)(x, exp);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
	return res;
	}

	INTERCEPTOR(long double, frexpl, long double x, int *exp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, frexpl, x, exp);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, exp, sizeof(*exp));
	long double res = REAL(frexpl)(x, exp);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(exp, sizeof(*exp));
	return res;
	}

	#define INIT_FREXPF_FREXPL \
	COMMON_INTERCEPT_FUNCTION(frexpf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(frexpl)
	#else
	#define INIT_FREXPF_FREXPL
	#endif // SANITIZER_INTERCEPT_FREXPF_FREXPL

	#if SI_POSIX
	static void write_iovec(void ctx, struct __sanitizer_iovec iovec,
	SIZE_T iovlen, SIZE_T maxlen) {
	for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
	SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iovec[i].iov_base, sz);
	maxlen -= sz;
	}
	}

	static void read_iovec(void ctx, struct __sanitizer_iovec iovec,
	SIZE_T iovlen, SIZE_T maxlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec, sizeof(iovec) iovlen);
	for (SIZE_T i = 0; i < iovlen && maxlen; ++i) {
	SSIZE_T sz = Min(iovec[i].iov_len, maxlen);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec[i].iov_base, sz);
	maxlen -= sz;
	}
	}
	#endif

	#if SANITIZER_INTERCEPT_READ
	INTERCEPTOR(SSIZE_T, read, int fd, void *ptr, SIZE_T count) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, read, fd, ptr, count);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(read)(fd, ptr, count);
	if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_READ COMMON_INTERCEPT_FUNCTION(read)
	#else
	#define INIT_READ
	#endif

	#if SANITIZER_INTERCEPT_FREAD
	INTERCEPTOR(SIZE_T, fread, void ptr, SIZE_T size, SIZE_T nmemb, void file) {
	// libc file streams can call user-supplied functions, see fopencookie.
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fread, ptr, size, nmemb, file);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(fread)(ptr, size, nmemb, file);
	if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res * size);
	return res;
	}
	#define INIT_FREAD COMMON_INTERCEPT_FUNCTION(fread)
	#else
	#define INIT_FREAD
	#endif

	#if SANITIZER_INTERCEPT_PREAD
	INTERCEPTOR(SSIZE_T, pread, int fd, void *ptr, SIZE_T count, OFF_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pread, fd, ptr, count, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(pread)(fd, ptr, count, offset);
	if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_PREAD COMMON_INTERCEPT_FUNCTION(pread)
	#else
	#define INIT_PREAD
	#endif

	#if SANITIZER_INTERCEPT_PREAD64
	INTERCEPTOR(SSIZE_T, pread64, int fd, void *ptr, SIZE_T count, OFF64_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pread64, fd, ptr, count, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(pread64)(fd, ptr, count, offset);
	if (res > 0) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_PREAD64 COMMON_INTERCEPT_FUNCTION(pread64)
	#else
	#define INIT_PREAD64
	#endif

	#if SANITIZER_INTERCEPT_READV
	INTERCEPTOR_WITH_SUFFIX(SSIZE_T, readv, int fd, __sanitizer_iovec *iov,
	int iovcnt) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readv, fd, iov, iovcnt);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	SSIZE_T res = REAL(readv)(fd, iov, iovcnt);
	if (res > 0) write_iovec(ctx, iov, iovcnt, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_READV COMMON_INTERCEPT_FUNCTION(readv)
	#else
	#define INIT_READV
	#endif

	#if SANITIZER_INTERCEPT_PREADV
	INTERCEPTOR(SSIZE_T, preadv, int fd, __sanitizer_iovec *iov, int iovcnt,
	OFF_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, preadv, fd, iov, iovcnt, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	SSIZE_T res = REAL(preadv)(fd, iov, iovcnt, offset);
	if (res > 0) write_iovec(ctx, iov, iovcnt, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_PREADV COMMON_INTERCEPT_FUNCTION(preadv)
	#else
	#define INIT_PREADV
	#endif

	#if SANITIZER_INTERCEPT_PREADV64
	INTERCEPTOR(SSIZE_T, preadv64, int fd, __sanitizer_iovec *iov, int iovcnt,
	OFF64_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, preadv64, fd, iov, iovcnt, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	SSIZE_T res = REAL(preadv64)(fd, iov, iovcnt, offset);
	if (res > 0) write_iovec(ctx, iov, iovcnt, res);
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}
	#define INIT_PREADV64 COMMON_INTERCEPT_FUNCTION(preadv64)
	#else
	#define INIT_PREADV64
	#endif

	#if SANITIZER_INTERCEPT_WRITE
	INTERCEPTOR(SSIZE_T, write, int fd, void *ptr, SIZE_T count) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, write, fd, ptr, count);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(write)(fd, ptr, count);
	// FIXME: this check should be _before_ the call to REAL(write), not after
	if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
	return res;
	}
	#define INIT_WRITE COMMON_INTERCEPT_FUNCTION(write)
	#else
	#define INIT_WRITE
	#endif

	#if SANITIZER_INTERCEPT_FWRITE
	INTERCEPTOR(SIZE_T, fwrite, const void p, uptr size, uptr nmemb, void file) {
	// libc file streams can call user-supplied functions, see fopencookie.
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fwrite, p, size, nmemb, file);
	SIZE_T res = REAL(fwrite)(p, size, nmemb, file);
	if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, p, res * size);
	return res;
	}
	#define INIT_FWRITE COMMON_INTERCEPT_FUNCTION(fwrite)
	#else
	#define INIT_FWRITE
	#endif

	#if SANITIZER_INTERCEPT_PWRITE
	INTERCEPTOR(SSIZE_T, pwrite, int fd, void *ptr, SIZE_T count, OFF_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pwrite, fd, ptr, count, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(pwrite)(fd, ptr, count, offset);
	if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
	return res;
	}
	#define INIT_PWRITE COMMON_INTERCEPT_FUNCTION(pwrite)
	#else
	#define INIT_PWRITE
	#endif

	#if SANITIZER_INTERCEPT_PWRITE64
	INTERCEPTOR(SSIZE_T, pwrite64, int fd, void *ptr, OFF64_T count,
	OFF64_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pwrite64, fd, ptr, count, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(pwrite64)(fd, ptr, count, offset);
	if (res > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, res);
	return res;
	}
	#define INIT_PWRITE64 COMMON_INTERCEPT_FUNCTION(pwrite64)
	#else
	#define INIT_PWRITE64
	#endif

	#if SANITIZER_INTERCEPT_WRITEV
	INTERCEPTOR_WITH_SUFFIX(SSIZE_T, writev, int fd, __sanitizer_iovec *iov,
	int iovcnt) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, writev, fd, iov, iovcnt);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(writev)(fd, iov, iovcnt);
	if (res > 0) read_iovec(ctx, iov, iovcnt, res);
	return res;
	}
	#define INIT_WRITEV COMMON_INTERCEPT_FUNCTION(writev)
	#else
	#define INIT_WRITEV
	#endif

	#if SANITIZER_INTERCEPT_PWRITEV
	INTERCEPTOR(SSIZE_T, pwritev, int fd, __sanitizer_iovec *iov, int iovcnt,
	OFF_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pwritev, fd, iov, iovcnt, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(pwritev)(fd, iov, iovcnt, offset);
	if (res > 0) read_iovec(ctx, iov, iovcnt, res);
	return res;
	}
	#define INIT_PWRITEV COMMON_INTERCEPT_FUNCTION(pwritev)
	#else
	#define INIT_PWRITEV
	#endif

	#if SANITIZER_INTERCEPT_PWRITEV64
	INTERCEPTOR(SSIZE_T, pwritev64, int fd, __sanitizer_iovec *iov, int iovcnt,
	OFF64_T offset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pwritev64, fd, iov, iovcnt, offset);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	if (fd >= 0) COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	SSIZE_T res = REAL(pwritev64)(fd, iov, iovcnt, offset);
	if (res > 0) read_iovec(ctx, iov, iovcnt, res);
	return res;
	}
	#define INIT_PWRITEV64 COMMON_INTERCEPT_FUNCTION(pwritev64)
	#else
	#define INIT_PWRITEV64
	#endif

	#if SANITIZER_INTERCEPT_FGETS
	INTERCEPTOR(char , fgets, char s, SIZE_T size, void *file) {
	// libc file streams can call user-supplied functions, see fopencookie.
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgets, s, size, file);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(fgets)(s, size, file);
	if (res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
	return res;
	}
	#define INIT_FGETS COMMON_INTERCEPT_FUNCTION(fgets)
	#else
	#define INIT_FGETS
	#endif

	#if SANITIZER_INTERCEPT_FPUTS
	INTERCEPTOR_WITH_SUFFIX(int, fputs, char s, void file) {
	// libc file streams can call user-supplied functions, see fopencookie.
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fputs, s, file);
	if (!SANITIZER_APPLE \|\| s) { // `fputs(NULL, file)` is supported on Darwin.
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	}
	return REAL(fputs)(s, file);
	}
	#define INIT_FPUTS COMMON_INTERCEPT_FUNCTION(fputs)
	#else
	#define INIT_FPUTS
	#endif

	#if SANITIZER_INTERCEPT_PUTS
	INTERCEPTOR(int, puts, char *s) {
	// libc file streams can call user-supplied functions, see fopencookie.
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, puts, s);
	if (!SANITIZER_APPLE \|\| s) { // `puts(NULL)` is supported on Darwin.
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	}
	return REAL(puts)(s);
	}
	#define INIT_PUTS COMMON_INTERCEPT_FUNCTION(puts)
	#else
	#define INIT_PUTS
	#endif

	#if SANITIZER_INTERCEPT_PRCTL
	INTERCEPTOR(int, prctl, int option, unsigned long arg2, unsigned long arg3,
	unsigned long arg4, unsigned long arg5) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, prctl, option, arg2, arg3, arg4, arg5);
	static const int PR_SET_NAME = 15;
	static const int PR_SET_VMA = 0x53564d41;
	static const int PR_SCHED_CORE = 62;
	static const int PR_SCHED_CORE_GET = 0;
	if (option == PR_SET_VMA && arg2 == 0UL) {
	char name = (char )arg5;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	}
	int res = REAL(prctl(option, arg2, arg3, arg4, arg5));
	if (option == PR_SET_NAME) {
	char buff[16];
	internal_strncpy(buff, (char *)arg2, 15);
	buff[15] = 0;
	COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, buff);
	} else if (res != -1 && option == PR_SCHED_CORE && arg2 == PR_SCHED_CORE_GET) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (u64*)(arg5), sizeof(u64));
	}
	return res;
	}
	#define INIT_PRCTL COMMON_INTERCEPT_FUNCTION(prctl)
	#else
	#define INIT_PRCTL
	#endif // SANITIZER_INTERCEPT_PRCTL

	#if SANITIZER_INTERCEPT_TIME
	INTERCEPTOR(unsigned long, time, unsigned long *t) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, time, t);
	unsigned long local_t;
	unsigned long res = REAL(time)(&local_t);
	if (t && res != (unsigned long)-1) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, t, sizeof(*t));
	*t = local_t;
	}
	return res;
	}
	#define INIT_TIME COMMON_INTERCEPT_FUNCTION(time);
	#else
	#define INIT_TIME
	#endif // SANITIZER_INTERCEPT_TIME

	#if SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS
	static void unpoison_tm(void ctx, __sanitizer_tm tm) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tm, sizeof(*tm));
	#if !SANITIZER_SOLARIS
	if (tm->tm_zone) {
	// Can not use COMMON_INTERCEPTOR_WRITE_RANGE here, because tm->tm_zone
	// can point to shared memory and tsan would report a data race.
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(tm->tm_zone,
	internal_strlen(tm->tm_zone) + 1);
	}
	#endif
	}
	INTERCEPTOR(__sanitizer_tm , localtime, unsigned long timep) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, localtime, timep);
	__sanitizer_tm *res = REAL(localtime)(timep);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	unpoison_tm(ctx, res);
	}
	return res;
	}
	INTERCEPTOR(__sanitizer_tm , localtime_r, unsigned long timep, void *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, localtime_r, timep, result);
	__sanitizer_tm *res = REAL(localtime_r)(timep, result);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	unpoison_tm(ctx, res);
	}
	return res;
	}
	INTERCEPTOR(__sanitizer_tm , gmtime, unsigned long timep) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gmtime, timep);
	__sanitizer_tm *res = REAL(gmtime)(timep);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	unpoison_tm(ctx, res);
	}
	return res;
	}
	INTERCEPTOR(__sanitizer_tm , gmtime_r, unsigned long timep, void *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gmtime_r, timep, result);
	__sanitizer_tm *res = REAL(gmtime_r)(timep, result);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	unpoison_tm(ctx, res);
	}
	return res;
	}
	INTERCEPTOR(char , ctime, unsigned long timep) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ctime, timep);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(ctime)(timep);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	}
	return res;
	}
	INTERCEPTOR(char , ctime_r, unsigned long timep, char *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ctime_r, timep, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(ctime_r)(timep, result);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timep, sizeof(*timep));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	}
	return res;
	}
	INTERCEPTOR(char , asctime, __sanitizer_tm tm) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, asctime, tm);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(asctime)(tm);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	}
	return res;
	}
	INTERCEPTOR(char , asctime_r, __sanitizer_tm tm, char *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, asctime_r, tm, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(asctime_r)(tm, result);
	if (res) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, tm, sizeof(*tm));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	}
	return res;
	}
	INTERCEPTOR(long, mktime, __sanitizer_tm *tm) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mktime, tm);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_sec, sizeof(tm->tm_sec));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_min, sizeof(tm->tm_min));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_hour, sizeof(tm->tm_hour));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_mday, sizeof(tm->tm_mday));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_mon, sizeof(tm->tm_mon));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_year, sizeof(tm->tm_year));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &tm->tm_isdst, sizeof(tm->tm_isdst));
	long res = REAL(mktime)(tm);
	if (res != -1) unpoison_tm(ctx, tm);
	return res;
	}
	#define INIT_LOCALTIME_AND_FRIENDS \
	COMMON_INTERCEPT_FUNCTION(localtime); \
	COMMON_INTERCEPT_FUNCTION(localtime_r); \
	COMMON_INTERCEPT_FUNCTION(gmtime); \
	COMMON_INTERCEPT_FUNCTION(gmtime_r); \
	COMMON_INTERCEPT_FUNCTION(ctime); \
	COMMON_INTERCEPT_FUNCTION(ctime_r); \
	COMMON_INTERCEPT_FUNCTION(asctime); \
	COMMON_INTERCEPT_FUNCTION(asctime_r); \
	COMMON_INTERCEPT_FUNCTION(mktime);
	#else
	#define INIT_LOCALTIME_AND_FRIENDS
	#endif // SANITIZER_INTERCEPT_LOCALTIME_AND_FRIENDS

	#if SANITIZER_INTERCEPT_STRPTIME
	INTERCEPTOR(char , strptime, char s, char format, __sanitizer_tm tm) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strptime, s, format, tm);
	if (format)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(strptime)(s, format, tm);
	COMMON_INTERCEPTOR_READ_STRING(ctx, s, res ? res - s : 0);
	if (res && tm) {
	// Do not call unpoison_tm here, because strptime does not, in fact,
	// initialize the entire struct tm. For example, tm_zone pointer is left
	// uninitialized.
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tm, sizeof(*tm));
	}
	return res;
	}
	#define INIT_STRPTIME COMMON_INTERCEPT_FUNCTION(strptime);
	#else
	#define INIT_STRPTIME
	#endif

	#if SANITIZER_INTERCEPT_SCANF \|\| SANITIZER_INTERCEPT_PRINTF
	#include "sanitizer_common_interceptors_format.inc"

	#define FORMAT_INTERCEPTOR_IMPL(name, vname, ...) \
	{ \
	void *ctx; \
	va_list ap; \
	va_start(ap, format); \
	COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__, ap); \
	int res = WRAP(vname)(__VA_ARGS__, ap); \
	va_end(ap); \
	return res; \
	}

	#endif

	#if SANITIZER_INTERCEPT_SCANF

	#define VSCANF_INTERCEPTOR_IMPL(vname, allowGnuMalloc, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__); \
	va_list aq; \
	va_copy(aq, ap); \
	int res = REAL(vname)(__VA_ARGS__); \
	if (res > 0) \
	scanf_common(ctx, res, allowGnuMalloc, format, aq); \
	va_end(aq); \
	return res; \
	}

	INTERCEPTOR(int, vscanf, const char *format, va_list ap)
	VSCANF_INTERCEPTOR_IMPL(vscanf, true, format, ap)

	INTERCEPTOR(int, vsscanf, const char str, const char format, va_list ap)
	VSCANF_INTERCEPTOR_IMPL(vsscanf, true, str, format, ap)

	INTERCEPTOR(int, vfscanf, void stream, const char format, va_list ap)
	VSCANF_INTERCEPTOR_IMPL(vfscanf, true, stream, format, ap)

	#if SANITIZER_INTERCEPT_ISOC99_SCANF
	INTERCEPTOR(int, __isoc99_vscanf, const char *format, va_list ap)
	VSCANF_INTERCEPTOR_IMPL(__isoc99_vscanf, false, format, ap)

	INTERCEPTOR(int, __isoc99_vsscanf, const char str, const char format,
	va_list ap)
	VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap)

	INTERCEPTOR(int, __isoc99_vfscanf, void stream, const char format, va_list ap)
	VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap)
	#endif // SANITIZER_INTERCEPT_ISOC99_SCANF

	INTERCEPTOR(int, scanf, const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(scanf, vscanf, format)

	INTERCEPTOR(int, fscanf, void stream, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(fscanf, vfscanf, stream, format)

	INTERCEPTOR(int, sscanf, const char str, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(sscanf, vsscanf, str, format)

	#if SANITIZER_INTERCEPT_ISOC99_SCANF
	INTERCEPTOR(int, __isoc99_scanf, const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_scanf, __isoc99_vscanf, format)

	INTERCEPTOR(int, __isoc99_fscanf, void stream, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format)

	INTERCEPTOR(int, __isoc99_sscanf, const char str, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format)
	#endif

	#endif

	#if SANITIZER_INTERCEPT_SCANF
	#define INIT_SCANF \
	COMMON_INTERCEPT_FUNCTION_LDBL(scanf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(sscanf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(fscanf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vscanf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vsscanf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vfscanf);
	#else
	#define INIT_SCANF
	#endif

	#if SANITIZER_INTERCEPT_ISOC99_SCANF
	#define INIT_ISOC99_SCANF \
	COMMON_INTERCEPT_FUNCTION(__isoc99_scanf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_sscanf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf);
	#else
	#define INIT_ISOC99_SCANF
	#endif

	#if SANITIZER_INTERCEPT_PRINTF

	#define VPRINTF_INTERCEPTOR_ENTER(vname, ...) \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, vname, __VA_ARGS__); \
	va_list aq; \
	va_copy(aq, ap);

	#define VPRINTF_INTERCEPTOR_RETURN() \
	va_end(aq);

	#define VPRINTF_INTERCEPTOR_IMPL(vname, ...) \
	{ \
	VPRINTF_INTERCEPTOR_ENTER(vname, __VA_ARGS__); \
	if (common_flags()->check_printf) \
	printf_common(ctx, format, aq); \
	int res = REAL(vname)(__VA_ARGS__); \
	VPRINTF_INTERCEPTOR_RETURN(); \
	return res; \
	}

	// FIXME: under ASan the REAL() call below may write to freed memory and
	// corrupt its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#define VSPRINTF_INTERCEPTOR_IMPL(vname, str, ...) \
	{ \
	VPRINTF_INTERCEPTOR_ENTER(vname, str, __VA_ARGS__) \
	if (common_flags()->check_printf) { \
	printf_common(ctx, format, aq); \
	} \
	int res = REAL(vname)(str, __VA_ARGS__); \
	if (res >= 0) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, res + 1); \
	} \
	VPRINTF_INTERCEPTOR_RETURN(); \
	return res; \
	}

	// FIXME: under ASan the REAL() call below may write to freed memory and
	// corrupt its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#define VSNPRINTF_INTERCEPTOR_IMPL(vname, str, size, ...) \
	{ \
	VPRINTF_INTERCEPTOR_ENTER(vname, str, size, __VA_ARGS__) \
	if (common_flags()->check_printf) { \
	printf_common(ctx, format, aq); \
	} \
	int res = REAL(vname)(str, size, __VA_ARGS__); \
	if (res >= 0) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, Min(size, (SIZE_T)(res + 1))); \
	} \
	VPRINTF_INTERCEPTOR_RETURN(); \
	return res; \
	}

	// FIXME: under ASan the REAL() call below may write to freed memory and
	// corrupt its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#define VASPRINTF_INTERCEPTOR_IMPL(vname, strp, ...) \
	{ \
	VPRINTF_INTERCEPTOR_ENTER(vname, strp, __VA_ARGS__) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, strp, sizeof(char *)); \
	if (common_flags()->check_printf) { \
	printf_common(ctx, format, aq); \
	} \
	int res = REAL(vname)(strp, __VA_ARGS__); \
	if (res >= 0) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *strp, res + 1); \
	} \
	VPRINTF_INTERCEPTOR_RETURN(); \
	return res; \
	}

	INTERCEPTOR(int, vprintf, const char *format, va_list ap)
	VPRINTF_INTERCEPTOR_IMPL(vprintf, format, ap)

	INTERCEPTOR(int, vfprintf, __sanitizer_FILE stream, const char format,
	va_list ap)
	VPRINTF_INTERCEPTOR_IMPL(vfprintf, stream, format, ap)

	INTERCEPTOR(int, vsnprintf, char str, SIZE_T size, const char format,
	va_list ap)
	VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf, str, size, format, ap)

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	INTERCEPTOR(int, __vsnprintf_chk, char *str, SIZE_T size, int flag,
	SIZE_T size_to, const char *format, va_list ap)
	VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf, str, size, format, ap)
	#endif

	#if SANITIZER_INTERCEPT_PRINTF_L
	INTERCEPTOR(int, vsnprintf_l, char str, SIZE_T size, void loc,
	const char *format, va_list ap)
	VSNPRINTF_INTERCEPTOR_IMPL(vsnprintf_l, str, size, loc, format, ap)

	INTERCEPTOR(int, snprintf_l, char str, SIZE_T size, void loc,
	const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(snprintf_l, vsnprintf_l, str, size, loc, format)
	#endif // SANITIZER_INTERCEPT_PRINTF_L

	INTERCEPTOR(int, vsprintf, char str, const char format, va_list ap)
	VSPRINTF_INTERCEPTOR_IMPL(vsprintf, str, format, ap)

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	INTERCEPTOR(int, __vsprintf_chk, char *str, int flag, SIZE_T size_to,
	const char *format, va_list ap)
	VSPRINTF_INTERCEPTOR_IMPL(vsprintf, str, format, ap)
	#endif

	INTERCEPTOR(int, vasprintf, char *strp, const char format, va_list ap)
	VASPRINTF_INTERCEPTOR_IMPL(vasprintf, strp, format, ap)

	#if SANITIZER_INTERCEPT_ISOC99_PRINTF
	INTERCEPTOR(int, __isoc99_vprintf, const char *format, va_list ap)
	VPRINTF_INTERCEPTOR_IMPL(__isoc99_vprintf, format, ap)

	INTERCEPTOR(int, __isoc99_vfprintf, __sanitizer_FILE *stream,
	const char *format, va_list ap)
	VPRINTF_INTERCEPTOR_IMPL(__isoc99_vfprintf, stream, format, ap)

	INTERCEPTOR(int, __isoc99_vsnprintf, char str, SIZE_T size, const char format,
	va_list ap)
	VSNPRINTF_INTERCEPTOR_IMPL(__isoc99_vsnprintf, str, size, format, ap)

	INTERCEPTOR(int, __isoc99_vsprintf, char str, const char format,
	va_list ap)
	VSPRINTF_INTERCEPTOR_IMPL(__isoc99_vsprintf, str, format,
	ap)

	#endif // SANITIZER_INTERCEPT_ISOC99_PRINTF

	INTERCEPTOR(int, printf, const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(printf, vprintf, format)

	INTERCEPTOR(int, fprintf, __sanitizer_FILE stream, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(fprintf, vfprintf, stream, format)

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	INTERCEPTOR(int, __fprintf_chk, __sanitizer_FILE *stream, SIZE_T size,
	const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__fprintf_chk, vfprintf, stream, format)
	#endif

	INTERCEPTOR(int, sprintf, char str, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(sprintf, vsprintf, str, format)

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	INTERCEPTOR(int, __sprintf_chk, char *str, int flag, SIZE_T size_to,
	const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__sprintf_chk, vsprintf, str, format)
	#endif

	INTERCEPTOR(int, snprintf, char str, SIZE_T size, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(snprintf, vsnprintf, str, size, format)

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	INTERCEPTOR(int, __snprintf_chk, char *str, SIZE_T size, int flag,
	SIZE_T size_to, const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__snprintf_chk, vsnprintf, str, size, format)
	#endif

	INTERCEPTOR(int, asprintf, char *strp, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(asprintf, vasprintf, strp, format)

	#if SANITIZER_INTERCEPT_ISOC99_PRINTF
	INTERCEPTOR(int, __isoc99_printf, const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_printf, __isoc99_vprintf, format)

	INTERCEPTOR(int, __isoc99_fprintf, __sanitizer_FILE stream, const char format,
	...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_fprintf, __isoc99_vfprintf, stream, format)

	INTERCEPTOR(int, __isoc99_sprintf, char str, const char format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_sprintf, __isoc99_vsprintf, str, format)

	INTERCEPTOR(int, __isoc99_snprintf, char *str, SIZE_T size,
	const char *format, ...)
	FORMAT_INTERCEPTOR_IMPL(__isoc99_snprintf, __isoc99_vsnprintf, str, size,
	format)

	#endif // SANITIZER_INTERCEPT_ISOC99_PRINTF

	#endif // SANITIZER_INTERCEPT_PRINTF

	#if SANITIZER_INTERCEPT_PRINTF
	#define INIT_PRINTF \
	COMMON_INTERCEPT_FUNCTION_LDBL(printf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(sprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(snprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(asprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(fprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vsprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vsnprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vasprintf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(vfprintf);
	#else
	#define INIT_PRINTF
	#endif

	#if SANITIZER_INTERCEPT___PRINTF_CHK
	#define INIT___PRINTF_CHK \
	COMMON_INTERCEPT_FUNCTION(__sprintf_chk); \
	COMMON_INTERCEPT_FUNCTION(__snprintf_chk); \
	COMMON_INTERCEPT_FUNCTION(__vsprintf_chk); \
	COMMON_INTERCEPT_FUNCTION(__vsnprintf_chk); \
	COMMON_INTERCEPT_FUNCTION(__fprintf_chk);
	#else
	#define INIT___PRINTF_CHK
	#endif

	#if SANITIZER_INTERCEPT_PRINTF_L
	#define INIT_PRINTF_L \
	COMMON_INTERCEPT_FUNCTION(snprintf_l); \
	COMMON_INTERCEPT_FUNCTION(vsnprintf_l);
	#else
	#define INIT_PRINTF_L
	#endif

	#if SANITIZER_INTERCEPT_ISOC99_PRINTF
	#define INIT_ISOC99_PRINTF \
	COMMON_INTERCEPT_FUNCTION(__isoc99_printf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_sprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_snprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_fprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vsprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vsnprintf); \
	COMMON_INTERCEPT_FUNCTION(__isoc99_vfprintf);
	#else
	#define INIT_ISOC99_PRINTF
	#endif

	#if SANITIZER_INTERCEPT_IOCTL
	#include "sanitizer_common_interceptors_ioctl.inc"
	#include "sanitizer_interceptors_ioctl_netbsd.inc"
	INTERCEPTOR(int, ioctl, int d, unsigned long request, ...) {
	// We need a frame pointer, because we call into ioctl_common_[pre\|post] which
	// can trigger a report and we need to be able to unwind through this
	// function. On Mac in debug mode we might not have a frame pointer, because
	// ioctl_common_[pre\|post] doesn't get inlined here.
	ENABLE_FRAME_POINTER;

	void *ctx;
	va_list ap;
	va_start(ap, request);
	void arg = va_arg(ap, void );
	va_end(ap);
	COMMON_INTERCEPTOR_ENTER(ctx, ioctl, d, request, arg);

	CHECK(ioctl_initialized);

	// Note: TSan does not use common flags, and they are zero-initialized.
	// This effectively disables ioctl handling in TSan.
	if (!common_flags()->handle_ioctl) return REAL(ioctl)(d, request, arg);

	// Although request is unsigned long, the rest of the interceptor uses it
	// as just "unsigned" to save space, because we know that all values fit in
	// "unsigned" - they are compile-time constants.

	const ioctl_desc *desc = ioctl_lookup(request);
	ioctl_desc decoded_desc;
	if (!desc) {
	VPrintf(2, "Decoding unknown ioctl 0x%lx\n", request);
	if (!ioctl_decode(request, &decoded_desc))
	Printf("WARNING: failed decoding unknown ioctl 0x%lx\n", request);
	else
	desc = &decoded_desc;
	}

	if (desc) ioctl_common_pre(ctx, desc, d, request, arg);
	int res = REAL(ioctl)(d, request, arg);
	// FIXME: some ioctls have different return values for success and failure.
	if (desc && res != -1) ioctl_common_post(ctx, desc, res, d, request, arg);
	return res;
	}
	#define INIT_IOCTL \
	ioctl_init(); \
	COMMON_INTERCEPT_FUNCTION(ioctl);
	#else
	#define INIT_IOCTL
	#endif

	#if SANITIZER_POSIX
	UNUSED static void unpoison_passwd(void ctx, __sanitizer_passwd pwd) {
	if (pwd) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd, sizeof(*pwd));
	if (pwd->pw_name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_name,
	internal_strlen(pwd->pw_name) + 1);
	if (pwd->pw_passwd)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_passwd,
	internal_strlen(pwd->pw_passwd) + 1);
	#if !SANITIZER_ANDROID
	if (pwd->pw_gecos)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_gecos,
	internal_strlen(pwd->pw_gecos) + 1);
	#endif
	#if SANITIZER_APPLE \|\| SANITIZER_FREEBSD \|\| SANITIZER_NETBSD
	if (pwd->pw_class)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_class,
	internal_strlen(pwd->pw_class) + 1);
	#endif
	if (pwd->pw_dir)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_dir,
	internal_strlen(pwd->pw_dir) + 1);
	if (pwd->pw_shell)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwd->pw_shell,
	internal_strlen(pwd->pw_shell) + 1);
	}
	}

	UNUSED static void unpoison_group(void ctx, __sanitizer_group grp) {
	if (grp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp, sizeof(*grp));
	if (grp->gr_name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_name,
	internal_strlen(grp->gr_name) + 1);
	if (grp->gr_passwd)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_passwd,
	internal_strlen(grp->gr_passwd) + 1);
	char **p = grp->gr_mem;
	for (; *p; ++p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
	}
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, grp->gr_mem,
	(p - grp->gr_mem + 1) * sizeof(*p));
	}
	}
	#endif // SANITIZER_POSIX

	#if SANITIZER_INTERCEPT_GETPWNAM_AND_FRIENDS
	INTERCEPTOR(__sanitizer_passwd , getpwnam, const char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwnam, name);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	__sanitizer_passwd *res = REAL(getpwnam)(name);
	unpoison_passwd(ctx, res);
	return res;
	}
	INTERCEPTOR(__sanitizer_passwd *, getpwuid, u32 uid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwuid, uid);
	__sanitizer_passwd *res = REAL(getpwuid)(uid);
	unpoison_passwd(ctx, res);
	return res;
	}
	INTERCEPTOR(__sanitizer_group , getgrnam, const char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrnam, name);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	__sanitizer_group *res = REAL(getgrnam)(name);
	unpoison_group(ctx, res);
	return res;
	}
	INTERCEPTOR(__sanitizer_group *, getgrgid, u32 gid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrgid, gid);
	__sanitizer_group *res = REAL(getgrgid)(gid);
	unpoison_group(ctx, res);
	return res;
	}
	#define INIT_GETPWNAM_AND_FRIENDS \
	COMMON_INTERCEPT_FUNCTION(getpwnam); \
	COMMON_INTERCEPT_FUNCTION(getpwuid); \
	COMMON_INTERCEPT_FUNCTION(getgrnam); \
	COMMON_INTERCEPT_FUNCTION(getgrgid);
	#else
	#define INIT_GETPWNAM_AND_FRIENDS
	#endif

	#if SANITIZER_INTERCEPT_GETPWNAM_R_AND_FRIENDS
	INTERCEPTOR(int, getpwnam_r, const char name, __sanitizer_passwd pwd,
	char buf, SIZE_T buflen, __sanitizer_passwd *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwnam_r, name, pwd, buf, buflen, result);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getpwnam_r)(name, pwd, buf, buflen, result);
	if (!res && result)
	unpoison_passwd(ctx, *result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	INTERCEPTOR(int, getpwuid_r, u32 uid, __sanitizer_passwd pwd, char buf,
	SIZE_T buflen, __sanitizer_passwd **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwuid_r, uid, pwd, buf, buflen, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getpwuid_r)(uid, pwd, buf, buflen, result);
	if (!res && result)
	unpoison_passwd(ctx, *result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	INTERCEPTOR(int, getgrnam_r, const char name, __sanitizer_group grp,
	char buf, SIZE_T buflen, __sanitizer_group *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrnam_r, name, grp, buf, buflen, result);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getgrnam_r)(name, grp, buf, buflen, result);
	if (!res && result)
	unpoison_group(ctx, *result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	INTERCEPTOR(int, getgrgid_r, u32 gid, __sanitizer_group grp, char buf,
	SIZE_T buflen, __sanitizer_group **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrgid_r, gid, grp, buf, buflen, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getgrgid_r)(gid, grp, buf, buflen, result);
	if (!res && result)
	unpoison_group(ctx, *result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	#define INIT_GETPWNAM_R_AND_FRIENDS \
	COMMON_INTERCEPT_FUNCTION(getpwnam_r); \
	COMMON_INTERCEPT_FUNCTION(getpwuid_r); \
	COMMON_INTERCEPT_FUNCTION(getgrnam_r); \
	COMMON_INTERCEPT_FUNCTION(getgrgid_r);
	#else
	#define INIT_GETPWNAM_R_AND_FRIENDS
	#endif

	#if SANITIZER_INTERCEPT_GETPWENT
	INTERCEPTOR(__sanitizer_passwd *, getpwent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwent, dummy);
	__sanitizer_passwd *res = REAL(getpwent)(dummy);
	unpoison_passwd(ctx, res);
	return res;
	}
	INTERCEPTOR(__sanitizer_group *, getgrent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrent, dummy);
	__sanitizer_group *res = REAL(getgrent)(dummy);
	unpoison_group(ctx, res);
	return res;
	}
	#define INIT_GETPWENT \
	COMMON_INTERCEPT_FUNCTION(getpwent); \
	COMMON_INTERCEPT_FUNCTION(getgrent);
	#else
	#define INIT_GETPWENT
	#endif

	#if SANITIZER_INTERCEPT_FGETPWENT
	INTERCEPTOR(__sanitizer_passwd , fgetpwent, void fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetpwent, fp);
	__sanitizer_passwd *res = REAL(fgetpwent)(fp);
	unpoison_passwd(ctx, res);
	return res;
	}
	INTERCEPTOR(__sanitizer_group , fgetgrent, void fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetgrent, fp);
	__sanitizer_group *res = REAL(fgetgrent)(fp);
	unpoison_group(ctx, res);
	return res;
	}
	#define INIT_FGETPWENT \
	COMMON_INTERCEPT_FUNCTION(fgetpwent); \
	COMMON_INTERCEPT_FUNCTION(fgetgrent);
	#else
	#define INIT_FGETPWENT
	#endif

	#if SANITIZER_INTERCEPT_GETPWENT_R
	INTERCEPTOR(int, getpwent_r, __sanitizer_passwd pwbuf, char buf,
	SIZE_T buflen, __sanitizer_passwd **pwbufp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpwent_r, pwbuf, buf, buflen, pwbufp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getpwent_r)(pwbuf, buf, buflen, pwbufp);
	if (!res && pwbufp)
	unpoison_passwd(ctx, *pwbufp);
	if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
	return res;
	}
	INTERCEPTOR(int, getgrent_r, __sanitizer_group pwbuf, char buf, SIZE_T buflen,
	__sanitizer_group **pwbufp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrent_r, pwbuf, buf, buflen, pwbufp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getgrent_r)(pwbuf, buf, buflen, pwbufp);
	if (!res && pwbufp)
	unpoison_group(ctx, *pwbufp);
	if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
	return res;
	}
	#define INIT_GETPWENT_R \
	COMMON_INTERCEPT_FUNCTION(getpwent_r); \
	COMMON_INTERCEPT_FUNCTION(getgrent_r);
	#else
	#define INIT_GETPWENT_R
	#endif

	#if SANITIZER_INTERCEPT_FGETPWENT_R
	INTERCEPTOR(int, fgetpwent_r, void fp, __sanitizer_passwd pwbuf, char *buf,
	SIZE_T buflen, __sanitizer_passwd **pwbufp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetpwent_r, fp, pwbuf, buf, buflen, pwbufp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fgetpwent_r)(fp, pwbuf, buf, buflen, pwbufp);
	if (!res && pwbufp)
	unpoison_passwd(ctx, *pwbufp);
	if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
	return res;
	}
	#define INIT_FGETPWENT_R \
	COMMON_INTERCEPT_FUNCTION(fgetpwent_r);
	#else
	#define INIT_FGETPWENT_R
	#endif

	#if SANITIZER_INTERCEPT_FGETGRENT_R
	INTERCEPTOR(int, fgetgrent_r, void fp, __sanitizer_group pwbuf, char *buf,
	SIZE_T buflen, __sanitizer_group **pwbufp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetgrent_r, fp, pwbuf, buf, buflen, pwbufp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fgetgrent_r)(fp, pwbuf, buf, buflen, pwbufp);
	if (!res && pwbufp)
	unpoison_group(ctx, *pwbufp);
	if (pwbufp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pwbufp, sizeof(*pwbufp));
	return res;
	}
	#define INIT_FGETGRENT_R \
	COMMON_INTERCEPT_FUNCTION(fgetgrent_r);
	#else
	#define INIT_FGETGRENT_R
	#endif

	#if SANITIZER_INTERCEPT_SETPWENT
	// The only thing these interceptors do is disable any nested interceptors.
	// These functions may open nss modules and call uninstrumented functions from
	// them, and we don't want things like strlen() to trigger.
	INTERCEPTOR(void, setpwent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setpwent, dummy);
	REAL(setpwent)(dummy);
	}
	INTERCEPTOR(void, endpwent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, endpwent, dummy);
	REAL(endpwent)(dummy);
	}
	INTERCEPTOR(void, setgrent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setgrent, dummy);
	REAL(setgrent)(dummy);
	}
	INTERCEPTOR(void, endgrent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, endgrent, dummy);
	REAL(endgrent)(dummy);
	}
	#define INIT_SETPWENT \
	COMMON_INTERCEPT_FUNCTION(setpwent); \
	COMMON_INTERCEPT_FUNCTION(endpwent); \
	COMMON_INTERCEPT_FUNCTION(setgrent); \
	COMMON_INTERCEPT_FUNCTION(endgrent);
	#else
	#define INIT_SETPWENT
	#endif

	#if SANITIZER_INTERCEPT_CLOCK_GETTIME
	INTERCEPTOR(int, clock_getres, u32 clk_id, void *tp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, clock_getres, clk_id, tp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(clock_getres)(clk_id, tp);
	if (!res && tp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
	}
	return res;
	}
	INTERCEPTOR(int, clock_gettime, u32 clk_id, void *tp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, clock_gettime, clk_id, tp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(clock_gettime)(clk_id, tp);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, struct_timespec_sz);
	}
	return res;
	}
	#if SANITIZER_GLIBC
	namespace __sanitizer {
	extern "C" {
	int real_clock_gettime(u32 clk_id, void *tp) {
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_clock_gettime(clk_id, tp);
	return REAL(clock_gettime)(clk_id, tp);
	}
	} // extern "C"
	} // namespace __sanitizer
	#endif
	INTERCEPTOR(int, clock_settime, u32 clk_id, const void *tp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, clock_settime, clk_id, tp);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, tp, struct_timespec_sz);
	return REAL(clock_settime)(clk_id, tp);
	}
	#define INIT_CLOCK_GETTIME \
	COMMON_INTERCEPT_FUNCTION(clock_getres); \
	COMMON_INTERCEPT_FUNCTION(clock_gettime); \
	COMMON_INTERCEPT_FUNCTION(clock_settime);
	#else
	#define INIT_CLOCK_GETTIME
	#endif

	#if SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
	INTERCEPTOR(int, clock_getcpuclockid, pid_t pid,
	__sanitizer_clockid_t *clockid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, clock_getcpuclockid, pid, clockid);
	int res = REAL(clock_getcpuclockid)(pid, clockid);
	if (!res && clockid) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
	}
	return res;
	}

	INTERCEPTOR(int, pthread_getcpuclockid, uptr thread,
	__sanitizer_clockid_t *clockid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_getcpuclockid, thread, clockid);
	int res = REAL(pthread_getcpuclockid)(thread, clockid);
	if (!res && clockid) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
	}
	return res;
	}

	#define INIT_CLOCK_GETCPUCLOCKID \
	COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid); \
	COMMON_INTERCEPT_FUNCTION(pthread_getcpuclockid);
	#else
	#define INIT_CLOCK_GETCPUCLOCKID
	#endif

	#if SANITIZER_INTERCEPT_GETITIMER
	INTERCEPTOR(int, getitimer, int which, void *curr_value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getitimer, which, curr_value);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getitimer)(which, curr_value);
	if (!res && curr_value) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, struct_itimerval_sz);
	}
	return res;
	}
	INTERCEPTOR(int, setitimer, int which, const void new_value, void old_value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setitimer, which, new_value, old_value);
	if (new_value) {
	// itimerval can contain padding that may be legitimately uninitialized
	const struct __sanitizer_itimerval *nv =
	(const struct __sanitizer_itimerval *)new_value;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_interval.tv_sec,
	sizeof(__sanitizer_time_t));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_interval.tv_usec,
	sizeof(__sanitizer_suseconds_t));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_value.tv_sec,
	sizeof(__sanitizer_time_t));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &nv->it_value.tv_usec,
	sizeof(__sanitizer_suseconds_t));
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(setitimer)(which, new_value, old_value);
	if (!res && old_value) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, struct_itimerval_sz);
	}
	return res;
	}
	#define INIT_GETITIMER \
	COMMON_INTERCEPT_FUNCTION(getitimer); \
	COMMON_INTERCEPT_FUNCTION(setitimer);
	#else
	#define INIT_GETITIMER
	#endif

	#if SANITIZER_INTERCEPT_GLOB
	static void unpoison_glob_t(void ctx, __sanitizer_glob_t pglob) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pglob, sizeof(*pglob));
	// +1 for NULL pointer at the end.
	if (pglob->gl_pathv)
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, pglob->gl_pathv, (pglob->gl_pathc + 1) * sizeof(*pglob->gl_pathv));
	for (SIZE_T i = 0; i < pglob->gl_pathc; ++i) {
	char *p = pglob->gl_pathv[i];
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
	}
	}

	#if SANITIZER_SOLARIS
	INTERCEPTOR(int, glob, const char *pattern, int flags,
	int (errfunc)(const char epath, int eerrno),
	__sanitizer_glob_t *pglob) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, glob, pattern, flags, errfunc, pglob);
	COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
	int res = REAL(glob)(pattern, flags, errfunc, pglob);
	if ((!res \|\| res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
	return res;
	}
	#else
	static THREADLOCAL __sanitizer_glob_t *pglob_copy;

	static void wrapped_gl_closedir(void *dir) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	pglob_copy->gl_closedir(dir);
	}

	static void wrapped_gl_readdir(void dir) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	return pglob_copy->gl_readdir(dir);
	}

	static void wrapped_gl_opendir(const char s) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
	return pglob_copy->gl_opendir(s);
	}

	static int wrapped_gl_lstat(const char s, void st) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
	return pglob_copy->gl_lstat(s, st);
	}

	static int wrapped_gl_stat(const char s, void st) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, internal_strlen(s) + 1);
	return pglob_copy->gl_stat(s, st);
	}

	static const __sanitizer_glob_t kGlobCopy = {
	0, 0, 0,
	0, wrapped_gl_closedir, wrapped_gl_readdir,
	wrapped_gl_opendir, wrapped_gl_lstat, wrapped_gl_stat};

	INTERCEPTOR(int, glob, const char *pattern, int flags,
	int (errfunc)(const char epath, int eerrno),
	__sanitizer_glob_t *pglob) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, glob, pattern, flags, errfunc, pglob);
	COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
	__sanitizer_glob_t glob_copy;
	internal_memcpy(&glob_copy, &kGlobCopy, sizeof(glob_copy));
	if (flags & glob_altdirfunc) {
	Swap(pglob->gl_closedir, glob_copy.gl_closedir);
	Swap(pglob->gl_readdir, glob_copy.gl_readdir);
	Swap(pglob->gl_opendir, glob_copy.gl_opendir);
	Swap(pglob->gl_lstat, glob_copy.gl_lstat);
	Swap(pglob->gl_stat, glob_copy.gl_stat);
	pglob_copy = &glob_copy;
	}
	int res = REAL(glob)(pattern, flags, errfunc, pglob);
	if (flags & glob_altdirfunc) {
	Swap(pglob->gl_closedir, glob_copy.gl_closedir);
	Swap(pglob->gl_readdir, glob_copy.gl_readdir);
	Swap(pglob->gl_opendir, glob_copy.gl_opendir);
	Swap(pglob->gl_lstat, glob_copy.gl_lstat);
	Swap(pglob->gl_stat, glob_copy.gl_stat);
	}
	pglob_copy = 0;
	if ((!res \|\| res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
	return res;
	}
	#endif // SANITIZER_SOLARIS
	#define INIT_GLOB \
	COMMON_INTERCEPT_FUNCTION(glob);
	#else // SANITIZER_INTERCEPT_GLOB
	#define INIT_GLOB
	#endif // SANITIZER_INTERCEPT_GLOB

	#if SANITIZER_INTERCEPT_GLOB64
	INTERCEPTOR(int, glob64, const char *pattern, int flags,
	int (errfunc)(const char epath, int eerrno),
	__sanitizer_glob_t *pglob) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, glob64, pattern, flags, errfunc, pglob);
	COMMON_INTERCEPTOR_READ_STRING(ctx, pattern, 0);
	__sanitizer_glob_t glob_copy;
	internal_memcpy(&glob_copy, &kGlobCopy, sizeof(glob_copy));
	if (flags & glob_altdirfunc) {
	Swap(pglob->gl_closedir, glob_copy.gl_closedir);
	Swap(pglob->gl_readdir, glob_copy.gl_readdir);
	Swap(pglob->gl_opendir, glob_copy.gl_opendir);
	Swap(pglob->gl_lstat, glob_copy.gl_lstat);
	Swap(pglob->gl_stat, glob_copy.gl_stat);
	pglob_copy = &glob_copy;
	}
	int res = REAL(glob64)(pattern, flags, errfunc, pglob);
	if (flags & glob_altdirfunc) {
	Swap(pglob->gl_closedir, glob_copy.gl_closedir);
	Swap(pglob->gl_readdir, glob_copy.gl_readdir);
	Swap(pglob->gl_opendir, glob_copy.gl_opendir);
	Swap(pglob->gl_lstat, glob_copy.gl_lstat);
	Swap(pglob->gl_stat, glob_copy.gl_stat);
	}
	pglob_copy = 0;
	if ((!res \|\| res == glob_nomatch) && pglob) unpoison_glob_t(ctx, pglob);
	return res;
	}
	#define INIT_GLOB64 \
	COMMON_INTERCEPT_FUNCTION(glob64);
	#else // SANITIZER_INTERCEPT_GLOB64
	#define INIT_GLOB64
	#endif // SANITIZER_INTERCEPT_GLOB64

	#if SANITIZER_INTERCEPT___B64_TO
	INTERCEPTOR(int, __b64_ntop, unsigned char const *src, SIZE_T srclength,
	char *target, SIZE_T targsize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __b64_ntop, src, srclength, target, targsize);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, srclength);
	int res = REAL(__b64_ntop)(src, srclength, target, targsize);
	if (res >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, target, res + 1);
	return res;
	}
	INTERCEPTOR(int, __b64_pton, char const src, char target, SIZE_T targsize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __b64_pton, src, target, targsize);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int res = REAL(__b64_pton)(src, target, targsize);
	if (res >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, target, res);
	return res;
	}
	#define INIT___B64_TO \
	COMMON_INTERCEPT_FUNCTION(__b64_ntop); \
	COMMON_INTERCEPT_FUNCTION(__b64_pton);
	#else // SANITIZER_INTERCEPT___B64_TO
	#define INIT___B64_TO
	#endif // SANITIZER_INTERCEPT___B64_TO

	#if SANITIZER_INTERCEPT___DN_EXPAND
	# if __GLIBC_PREREQ(2, 34)
	// Changed with https://sourceware.org/git/?p=glibc.git;h=640bbdf
	# define DN_EXPAND_INTERCEPTOR_NAME dn_expand
	# else
	# define DN_EXPAND_INTERCEPTOR_NAME __dn_expand
	# endif
	INTERCEPTOR(int, DN_EXPAND_INTERCEPTOR_NAME, unsigned char const *base,
	unsigned char const end, unsigned char const src, char *dest,
	int space) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, DN_EXPAND_INTERCEPTOR_NAME, base, end, src,
	dest, space);
	// TODO: add read check if __dn_comp intercept added
	int res = REAL(DN_EXPAND_INTERCEPTOR_NAME)(base, end, src, dest, space);
	if (res >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, internal_strlen(dest) + 1);
	return res;
	}
	# define INIT___DN_EXPAND \
	COMMON_INTERCEPT_FUNCTION(DN_EXPAND_INTERCEPTOR_NAME);
	#else // SANITIZER_INTERCEPT___DN_EXPAND
	# define INIT___DN_EXPAND
	#endif // SANITIZER_INTERCEPT___DN_EXPAND

	#if SANITIZER_INTERCEPT_POSIX_SPAWN

	template <class RealSpawnPtr>
	static int PosixSpawnImpl(void ctx, RealSpawnPtr real_posix_spawn, pid_t *pid,
	const char file_or_path, const void file_actions,
	const void attrp, char const argv[],
	char *const envp[]) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, file_or_path,
	internal_strlen(file_or_path) + 1);
	if (argv) {
	for (char const s = argv; ; ++s) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
	if (!*s) break;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	}
	}
	if (envp) {
	for (char const s = envp; ; ++s) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(*s));
	if (!*s) break;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	}
	}
	int res =
	real_posix_spawn(pid, file_or_path, file_actions, attrp, argv, envp);
	if (res == 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pid, sizeof(*pid));
	return res;
	}
	INTERCEPTOR(int, posix_spawn, pid_t pid, const char path,
	const void file_actions, const void attrp, char *const argv[],
	char *const envp[]) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, posix_spawn, pid, path, file_actions, attrp,
	argv, envp);
	return PosixSpawnImpl(ctx, REAL(posix_spawn), pid, path, file_actions, attrp,
	argv, envp);
	}
	INTERCEPTOR(int, posix_spawnp, pid_t pid, const char file,
	const void file_actions, const void attrp, char *const argv[],
	char *const envp[]) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, posix_spawnp, pid, file, file_actions, attrp,
	argv, envp);
	return PosixSpawnImpl(ctx, REAL(posix_spawnp), pid, file, file_actions, attrp,
	argv, envp);
	}
	# define INIT_POSIX_SPAWN \
	COMMON_INTERCEPT_FUNCTION(posix_spawn); \
	COMMON_INTERCEPT_FUNCTION(posix_spawnp);
	#else // SANITIZER_INTERCEPT_POSIX_SPAWN
	# define INIT_POSIX_SPAWN
	#endif // SANITIZER_INTERCEPT_POSIX_SPAWN

	#if SANITIZER_INTERCEPT_WAIT
	// According to sys/wait.h, wait(), waitid(), waitpid() may have symbol version
	// suffixes on Darwin. See the declaration of INTERCEPTOR_WITH_SUFFIX for
	// details.
	INTERCEPTOR_WITH_SUFFIX(int, wait, int *status) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wait, status);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(wait)(status);
	if (res != -1 && status)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
	return res;
	}
	// On FreeBSD id_t is always 64-bit wide.
	#if SANITIZER_FREEBSD && (SANITIZER_WORDSIZE == 32)
	INTERCEPTOR_WITH_SUFFIX(int, waitid, int idtype, long long id, void *infop,
	int options) {
	#else
	INTERCEPTOR_WITH_SUFFIX(int, waitid, int idtype, int id, void *infop,
	int options) {
	#endif
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, waitid, idtype, id, infop, options);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(waitid)(idtype, id, infop, options);
	if (res != -1 && infop)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, infop, siginfo_t_sz);
	return res;
	}
	INTERCEPTOR_WITH_SUFFIX(int, waitpid, int pid, int *status, int options) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, waitpid, pid, status, options);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(waitpid)(pid, status, options);
	if (res != -1 && status)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
	return res;
	}
	INTERCEPTOR(int, wait3, int status, int options, void rusage) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wait3, status, options, rusage);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(wait3)(status, options, rusage);
	if (res != -1) {
	if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
	if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
	}
	return res;
	}
	#if SANITIZER_ANDROID
	INTERCEPTOR(int, __wait4, int pid, int status, int options, void rusage) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __wait4, pid, status, options, rusage);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(__wait4)(pid, status, options, rusage);
	if (res != -1) {
	if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
	if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
	}
	return res;
	}
	#define INIT_WAIT4 COMMON_INTERCEPT_FUNCTION(__wait4);
	#else
	INTERCEPTOR(int, wait4, int pid, int status, int options, void rusage) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wait4, pid, status, options, rusage);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(wait4)(pid, status, options, rusage);
	if (res != -1) {
	if (status) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, status, sizeof(*status));
	if (rusage) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rusage, struct_rusage_sz);
	}
	return res;
	}
	#define INIT_WAIT4 COMMON_INTERCEPT_FUNCTION(wait4);
	#endif // SANITIZER_ANDROID
	#define INIT_WAIT \
	COMMON_INTERCEPT_FUNCTION(wait); \
	COMMON_INTERCEPT_FUNCTION(waitid); \
	COMMON_INTERCEPT_FUNCTION(waitpid); \
	COMMON_INTERCEPT_FUNCTION(wait3);
	#else
	#define INIT_WAIT
	#define INIT_WAIT4
	#endif

	#if SANITIZER_INTERCEPT_INET
	INTERCEPTOR(char , inet_ntop, int af, const void src, char *dst, u32 size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, inet_ntop, af, src, dst, size);
	uptr sz = __sanitizer_in_addr_sz(af);
	if (sz) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sz);
	// FIXME: figure out read size based on the address family.
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(inet_ntop)(af, src, dst, size);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}
	INTERCEPTOR(int, inet_pton, int af, const char src, void dst) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, inet_pton, af, src, dst);
	COMMON_INTERCEPTOR_READ_STRING(ctx, src, 0);
	// FIXME: figure out read size based on the address family.
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(inet_pton)(af, src, dst);
	if (res == 1) {
	uptr sz = __sanitizer_in_addr_sz(af);
	if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sz);
	}
	return res;
	}
	#define INIT_INET \
	COMMON_INTERCEPT_FUNCTION(inet_ntop); \
	COMMON_INTERCEPT_FUNCTION(inet_pton);
	#else
	#define INIT_INET
	#endif

	#if SANITIZER_INTERCEPT_INET
	INTERCEPTOR(int, inet_aton, const char cp, void dst) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, inet_aton, cp, dst);
	if (cp) COMMON_INTERCEPTOR_READ_RANGE(ctx, cp, internal_strlen(cp) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(inet_aton)(cp, dst);
	if (res != 0) {
	uptr sz = __sanitizer_in_addr_sz(af_inet);
	if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sz);
	}
	return res;
	}
	#define INIT_INET_ATON COMMON_INTERCEPT_FUNCTION(inet_aton);
	#else
	#define INIT_INET_ATON
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM
	INTERCEPTOR(int, pthread_getschedparam, uptr thread, int policy, int param) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_getschedparam, thread, policy, param);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(pthread_getschedparam)(thread, policy, param);
	if (res == 0) {
	if (policy) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, policy, sizeof(*policy));
	if (param) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, param, sizeof(*param));
	}
	return res;
	}
	#define INIT_PTHREAD_GETSCHEDPARAM \
	COMMON_INTERCEPT_FUNCTION(pthread_getschedparam);
	#else
	#define INIT_PTHREAD_GETSCHEDPARAM
	#endif

	#if SANITIZER_INTERCEPT_GETADDRINFO
	INTERCEPTOR(int, getaddrinfo, char node, char service,
	struct __sanitizer_addrinfo *hints,
	struct __sanitizer_addrinfo **out) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getaddrinfo, node, service, hints, out);
	if (node) COMMON_INTERCEPTOR_READ_RANGE(ctx, node, internal_strlen(node) + 1);
	if (service)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, service, internal_strlen(service) + 1);
	if (hints)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, hints, sizeof(__sanitizer_addrinfo));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getaddrinfo)(node, service, hints, out);
	if (res == 0 && out) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, out, sizeof(*out));
	struct __sanitizer_addrinfo p = out;
	while (p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
	if (p->ai_addr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_addr, p->ai_addrlen);
	if (p->ai_canonname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ai_canonname,
	internal_strlen(p->ai_canonname) + 1);
	p = p->ai_next;
	}
	}
	return res;
	}
	#define INIT_GETADDRINFO COMMON_INTERCEPT_FUNCTION(getaddrinfo);
	#else
	#define INIT_GETADDRINFO
	#endif

	#if SANITIZER_INTERCEPT_GETNAMEINFO
	INTERCEPTOR(int, getnameinfo, void sockaddr, unsigned salen, char host,
	unsigned hostlen, char *serv, unsigned servlen, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getnameinfo, sockaddr, salen, host, hostlen,
	serv, servlen, flags);
	// FIXME: consider adding READ_RANGE(sockaddr, salen)
	// There is padding in in_addr that may make this too noisy
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res =
	REAL(getnameinfo)(sockaddr, salen, host, hostlen, serv, servlen, flags);
	if (res == 0) {
	if (host && hostlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, host, internal_strlen(host) + 1);
	if (serv && servlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, serv, internal_strlen(serv) + 1);
	}
	return res;
	}
	#define INIT_GETNAMEINFO COMMON_INTERCEPT_FUNCTION(getnameinfo);
	#else
	#define INIT_GETNAMEINFO
	#endif

	#if SANITIZER_INTERCEPT_GETSOCKNAME
	INTERCEPTOR(int, getsockname, int sock_fd, void addr, unsigned addrlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getsockname, sock_fd, addr, addrlen);
	unsigned addr_sz;
	if (addrlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
	addr_sz = *addrlen;
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getsockname)(sock_fd, addr, addrlen);
	if (!res && addr && addrlen) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
	}
	return res;
	}
	#define INIT_GETSOCKNAME COMMON_INTERCEPT_FUNCTION(getsockname);
	#else
	#define INIT_GETSOCKNAME
	#endif

	#if SANITIZER_INTERCEPT_GETHOSTBYNAME \|\| SANITIZER_INTERCEPT_GETHOSTBYNAME_R
	static void write_hostent(void ctx, struct __sanitizer_hostent h) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h, sizeof(__sanitizer_hostent));
	if (h->h_name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h->h_name, internal_strlen(h->h_name) + 1);
	char **p = h->h_aliases;
	while (*p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
	++p;
	}
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, h->h_aliases, (p - h->h_aliases + 1) * sizeof(*h->h_aliases));
	p = h->h_addr_list;
	while (*p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *p, h->h_length);
	++p;
	}
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, h->h_addr_list, (p - h->h_addr_list + 1) * sizeof(*h->h_addr_list));
	}
	#endif

	#if SANITIZER_INTERCEPT_GETHOSTBYNAME
	INTERCEPTOR(struct __sanitizer_hostent , gethostbyname, char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname, name);
	struct __sanitizer_hostent *res = REAL(gethostbyname)(name);
	if (res) write_hostent(ctx, res);
	return res;
	}

	INTERCEPTOR(struct __sanitizer_hostent , gethostbyaddr, void addr, int len,
	int type) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyaddr, addr, len, type);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, len);
	struct __sanitizer_hostent *res = REAL(gethostbyaddr)(addr, len, type);
	if (res) write_hostent(ctx, res);
	return res;
	}

	INTERCEPTOR(struct __sanitizer_hostent *, gethostent, int fake) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostent, fake);
	struct __sanitizer_hostent *res = REAL(gethostent)(fake);
	if (res) write_hostent(ctx, res);
	return res;
	}
	#define INIT_GETHOSTBYNAME \
	COMMON_INTERCEPT_FUNCTION(gethostent); \
	COMMON_INTERCEPT_FUNCTION(gethostbyaddr); \
	COMMON_INTERCEPT_FUNCTION(gethostbyname);
	#else
	#define INIT_GETHOSTBYNAME
	#endif // SANITIZER_INTERCEPT_GETHOSTBYNAME

	#if SANITIZER_INTERCEPT_GETHOSTBYNAME2
	INTERCEPTOR(struct __sanitizer_hostent , gethostbyname2, char name, int af) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname2, name, af);
	struct __sanitizer_hostent *res = REAL(gethostbyname2)(name, af);
	if (res) write_hostent(ctx, res);
	return res;
	}
	#define INIT_GETHOSTBYNAME2 COMMON_INTERCEPT_FUNCTION(gethostbyname2);
	#else
	#define INIT_GETHOSTBYNAME2
	#endif // SANITIZER_INTERCEPT_GETHOSTBYNAME2

	#if SANITIZER_INTERCEPT_GETHOSTBYNAME_R
	INTERCEPTOR(int, gethostbyname_r, char name, struct __sanitizer_hostent ret,
	char buf, SIZE_T buflen, __sanitizer_hostent *result,
	int *h_errnop) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname_r, name, ret, buf, buflen, result,
	h_errnop);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(gethostbyname_r)(name, ret, buf, buflen, result, h_errnop);
	if (result) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (res == 0 && result) write_hostent(ctx, result);
	}
	if (h_errnop)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
	return res;
	}
	#define INIT_GETHOSTBYNAME_R COMMON_INTERCEPT_FUNCTION(gethostbyname_r);
	#else
	#define INIT_GETHOSTBYNAME_R
	#endif

	#if SANITIZER_INTERCEPT_GETHOSTENT_R
	INTERCEPTOR(int, gethostent_r, struct __sanitizer_hostent ret, char buf,
	SIZE_T buflen, __sanitizer_hostent *result, int h_errnop) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostent_r, ret, buf, buflen, result,
	h_errnop);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(gethostent_r)(ret, buf, buflen, result, h_errnop);
	if (result) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (res == 0 && result) write_hostent(ctx, result);
	}
	if (h_errnop)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
	return res;
	}
	#define INIT_GETHOSTENT_R \
	COMMON_INTERCEPT_FUNCTION(gethostent_r);
	#else
	#define INIT_GETHOSTENT_R
	#endif

	#if SANITIZER_INTERCEPT_GETHOSTBYADDR_R
	INTERCEPTOR(int, gethostbyaddr_r, void *addr, int len, int type,
	struct __sanitizer_hostent ret, char buf, SIZE_T buflen,
	__sanitizer_hostent *result, int h_errnop) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyaddr_r, addr, len, type, ret, buf,
	buflen, result, h_errnop);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, len);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(gethostbyaddr_r)(addr, len, type, ret, buf, buflen, result,
	h_errnop);
	if (result) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (res == 0 && result) write_hostent(ctx, result);
	}
	if (h_errnop)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
	return res;
	}
	#define INIT_GETHOSTBYADDR_R \
	COMMON_INTERCEPT_FUNCTION(gethostbyaddr_r);
	#else
	#define INIT_GETHOSTBYADDR_R
	#endif

	#if SANITIZER_INTERCEPT_GETHOSTBYNAME2_R
	INTERCEPTOR(int, gethostbyname2_r, char *name, int af,
	struct __sanitizer_hostent ret, char buf, SIZE_T buflen,
	__sanitizer_hostent *result, int h_errnop) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, gethostbyname2_r, name, af, ret, buf, buflen,
	result, h_errnop);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res =
	REAL(gethostbyname2_r)(name, af, ret, buf, buflen, result, h_errnop);
	if (result) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (res == 0 && result) write_hostent(ctx, result);
	}
	if (h_errnop)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, h_errnop, sizeof(*h_errnop));
	return res;
	}
	#define INIT_GETHOSTBYNAME2_R \
	COMMON_INTERCEPT_FUNCTION(gethostbyname2_r);
	#else
	#define INIT_GETHOSTBYNAME2_R
	#endif

	#if SANITIZER_INTERCEPT_GETSOCKOPT
	INTERCEPTOR(int, getsockopt, int sockfd, int level, int optname, void *optval,
	int *optlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getsockopt, sockfd, level, optname, optval,
	optlen);
	if (optlen) COMMON_INTERCEPTOR_READ_RANGE(ctx, optlen, sizeof(*optlen));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getsockopt)(sockfd, level, optname, optval, optlen);
	if (res == 0)
	if (optval && optlen) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, optval, *optlen);
	return res;
	}
	#define INIT_GETSOCKOPT COMMON_INTERCEPT_FUNCTION(getsockopt);
	#else
	#define INIT_GETSOCKOPT
	#endif

	#if SANITIZER_INTERCEPT_ACCEPT
	INTERCEPTOR(int, accept, int fd, void addr, unsigned addrlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, accept, fd, addr, addrlen);
	unsigned addrlen0 = 0;
	if (addrlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
	addrlen0 = *addrlen;
	}
	int fd2 = REAL(accept)(fd, addr, addrlen);
	if (fd2 >= 0) {
	if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
	if (addr && addrlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
	}
	return fd2;
	}
	#define INIT_ACCEPT COMMON_INTERCEPT_FUNCTION(accept);
	#else
	#define INIT_ACCEPT
	#endif

	#if SANITIZER_INTERCEPT_ACCEPT4
	INTERCEPTOR(int, accept4, int fd, void addr, unsigned addrlen, int f) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, accept4, fd, addr, addrlen, f);
	unsigned addrlen0 = 0;
	if (addrlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
	addrlen0 = *addrlen;
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int fd2 = REAL(accept4)(fd, addr, addrlen, f);
	if (fd2 >= 0) {
	if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
	if (addr && addrlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
	}
	return fd2;
	}
	#define INIT_ACCEPT4 COMMON_INTERCEPT_FUNCTION(accept4);
	#else
	#define INIT_ACCEPT4
	#endif

	#if SANITIZER_INTERCEPT_PACCEPT
	INTERCEPTOR(int, paccept, int fd, void addr, unsigned addrlen,
	__sanitizer_sigset_t *set, int f) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, paccept, fd, addr, addrlen, set, f);
	unsigned addrlen0 = 0;
	if (addrlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
	addrlen0 = *addrlen;
	}
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	int fd2 = REAL(paccept)(fd, addr, addrlen, set, f);
	if (fd2 >= 0) {
	if (fd >= 0) COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, fd2);
	if (addr && addrlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(*addrlen, addrlen0));
	}
	return fd2;
	}
	#define INIT_PACCEPT COMMON_INTERCEPT_FUNCTION(paccept);
	#else
	#define INIT_PACCEPT
	#endif

	#if SANITIZER_INTERCEPT_MODF
	INTERCEPTOR(double, modf, double x, double *iptr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, modf, x, iptr);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	double res = REAL(modf)(x, iptr);
	if (iptr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
	}
	return res;
	}
	INTERCEPTOR(float, modff, float x, float *iptr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, modff, x, iptr);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	float res = REAL(modff)(x, iptr);
	if (iptr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
	}
	return res;
	}
	INTERCEPTOR(long double, modfl, long double x, long double *iptr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, modfl, x, iptr);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	long double res = REAL(modfl)(x, iptr);
	if (iptr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iptr, sizeof(*iptr));
	}
	return res;
	}
	#define INIT_MODF \
	COMMON_INTERCEPT_FUNCTION(modf); \
	COMMON_INTERCEPT_FUNCTION(modff); \
	COMMON_INTERCEPT_FUNCTION_LDBL(modfl);
	#else
	#define INIT_MODF
	#endif

	#if SANITIZER_INTERCEPT_RECVMSG \|\| SANITIZER_INTERCEPT_RECVMMSG
	static void write_msghdr(void ctx, struct __sanitizer_msghdr msg,
	SSIZE_T maxlen) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg, sizeof(*msg));
	if (msg->msg_name && msg->msg_namelen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_name, msg->msg_namelen);
	if (msg->msg_iov && msg->msg_iovlen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_iov,
	sizeof(msg->msg_iov) msg->msg_iovlen);
	write_iovec(ctx, msg->msg_iov, msg->msg_iovlen, maxlen);
	if (msg->msg_control && msg->msg_controllen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msg->msg_control, msg->msg_controllen);
	}
	#endif

	#if SANITIZER_INTERCEPT_RECVMSG
	INTERCEPTOR(SSIZE_T, recvmsg, int fd, struct __sanitizer_msghdr *msg,
	int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, recvmsg, fd, msg, flags);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(recvmsg)(fd, msg, flags);
	if (res >= 0) {
	if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	if (msg) {
	write_msghdr(ctx, msg, res);
	COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg);
	}
	}
	return res;
	}
	#define INIT_RECVMSG COMMON_INTERCEPT_FUNCTION(recvmsg);
	#else
	#define INIT_RECVMSG
	#endif

	#if SANITIZER_INTERCEPT_RECVMMSG
	INTERCEPTOR(int, recvmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
	unsigned int vlen, int flags, void *timeout) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, recvmmsg, fd, msgvec, vlen, flags, timeout);
	if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
	int res = REAL(recvmmsg)(fd, msgvec, vlen, flags, timeout);
	if (res >= 0) {
	if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	for (int i = 0; i < res; ++i) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &msgvec[i].msg_len,
	sizeof(msgvec[i].msg_len));
	write_msghdr(ctx, &msgvec[i].msg_hdr, msgvec[i].msg_len);
	COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, &msgvec[i].msg_hdr);
	}
	}
	return res;
	}
	#define INIT_RECVMMSG COMMON_INTERCEPT_FUNCTION(recvmmsg);
	#else
	#define INIT_RECVMMSG
	#endif

	#if SANITIZER_INTERCEPT_SENDMSG \|\| SANITIZER_INTERCEPT_SENDMMSG
	static void read_msghdr_control(void ctx, void control, uptr controllen) {
	const unsigned kCmsgDataOffset =
	RoundUpTo(sizeof(__sanitizer_cmsghdr), sizeof(uptr));

	char p = (char )control;
	char *const control_end = p + controllen;
	while (true) {
	if (p + sizeof(__sanitizer_cmsghdr) > control_end) break;
	__sanitizer_cmsghdr cmsg = (__sanitizer_cmsghdr )p;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_len, sizeof(cmsg->cmsg_len));

	if (p + RoundUpTo(cmsg->cmsg_len, sizeof(uptr)) > control_end) break;

	COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_level,
	sizeof(cmsg->cmsg_level));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &cmsg->cmsg_type,
	sizeof(cmsg->cmsg_type));

	if (cmsg->cmsg_len > kCmsgDataOffset) {
	char *data = p + kCmsgDataOffset;
	unsigned data_len = cmsg->cmsg_len - kCmsgDataOffset;
	if (data_len > 0) COMMON_INTERCEPTOR_READ_RANGE(ctx, data, data_len);
	}

	p += RoundUpTo(cmsg->cmsg_len, sizeof(uptr));
	}
	}

	static void read_msghdr(void ctx, struct __sanitizer_msghdr msg,
	SSIZE_T maxlen) {
	#define R(f) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &msg->msg_##f, sizeof(msg->msg_##f))
	R(name);
	R(namelen);
	R(iov);
	R(iovlen);
	R(control);
	R(controllen);
	R(flags);
	#undef R
	if (msg->msg_name && msg->msg_namelen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_name, msg->msg_namelen);
	if (msg->msg_iov && msg->msg_iovlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, msg->msg_iov,
	sizeof(msg->msg_iov) msg->msg_iovlen);
	read_iovec(ctx, msg->msg_iov, msg->msg_iovlen, maxlen);
	if (msg->msg_control && msg->msg_controllen)
	read_msghdr_control(ctx, msg->msg_control, msg->msg_controllen);
	}
	#endif

	#if SANITIZER_INTERCEPT_SENDMSG
	INTERCEPTOR(SSIZE_T, sendmsg, int fd, struct __sanitizer_msghdr *msg,
	int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sendmsg, fd, msg, flags);
	if (fd >= 0) {
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	}
	SSIZE_T res = REAL(sendmsg)(fd, msg, flags);
	if (common_flags()->intercept_send && res >= 0 && msg)
	read_msghdr(ctx, msg, res);
	return res;
	}
	#define INIT_SENDMSG COMMON_INTERCEPT_FUNCTION(sendmsg);
	#else
	#define INIT_SENDMSG
	#endif

	#if SANITIZER_INTERCEPT_SENDMMSG
	INTERCEPTOR(int, sendmmsg, int fd, struct __sanitizer_mmsghdr *msgvec,
	unsigned vlen, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sendmmsg, fd, msgvec, vlen, flags);
	if (fd >= 0) {
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	}
	int res = REAL(sendmmsg)(fd, msgvec, vlen, flags);
	if (res >= 0 && msgvec) {
	for (int i = 0; i < res; ++i) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &msgvec[i].msg_len,
	sizeof(msgvec[i].msg_len));
	if (common_flags()->intercept_send)
	read_msghdr(ctx, &msgvec[i].msg_hdr, msgvec[i].msg_len);
	}
	}
	return res;
	}
	#define INIT_SENDMMSG COMMON_INTERCEPT_FUNCTION(sendmmsg);
	#else
	#define INIT_SENDMMSG
	#endif

	#if SANITIZER_INTERCEPT_SYSMSG
	INTERCEPTOR(int, msgsnd, int msqid, const void *msgp, SIZE_T msgsz,
	int msgflg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, msgsnd, msqid, msgp, msgsz, msgflg);
	if (msgp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, msgp, sizeof(long) + msgsz);
	int res = REAL(msgsnd)(msqid, msgp, msgsz, msgflg);
	return res;
	}

	INTERCEPTOR(SSIZE_T, msgrcv, int msqid, void *msgp, SIZE_T msgsz,
	long msgtyp, int msgflg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, msgrcv, msqid, msgp, msgsz, msgtyp, msgflg);
	SSIZE_T len = REAL(msgrcv)(msqid, msgp, msgsz, msgtyp, msgflg);
	if (len != -1)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, msgp, sizeof(long) + len);
	return len;
	}

	#define INIT_SYSMSG \
	COMMON_INTERCEPT_FUNCTION(msgsnd); \
	COMMON_INTERCEPT_FUNCTION(msgrcv);
	#else
	#define INIT_SYSMSG
	#endif

	#if SANITIZER_INTERCEPT_GETPEERNAME
	INTERCEPTOR(int, getpeername, int sockfd, void addr, unsigned addrlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpeername, sockfd, addr, addrlen);
	unsigned addr_sz;
	if (addrlen) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, addrlen, sizeof(*addrlen));
	addr_sz = *addrlen;
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getpeername)(sockfd, addr, addrlen);
	if (!res && addr && addrlen) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, Min(addr_sz, *addrlen));
	}
	return res;
	}
	#define INIT_GETPEERNAME COMMON_INTERCEPT_FUNCTION(getpeername);
	#else
	#define INIT_GETPEERNAME
	#endif

	#if SANITIZER_INTERCEPT_SYSINFO
	INTERCEPTOR(int, sysinfo, void *info) {
	void *ctx;
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	COMMON_INTERCEPTOR_ENTER(ctx, sysinfo, info);
	int res = REAL(sysinfo)(info);
	if (!res && info)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, struct_sysinfo_sz);
	return res;
	}
	#define INIT_SYSINFO COMMON_INTERCEPT_FUNCTION(sysinfo);
	#else
	#define INIT_SYSINFO
	#endif

	#if SANITIZER_INTERCEPT_READDIR
	INTERCEPTOR(__sanitizer_dirent , opendir, const char path) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, opendir, path);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	__sanitizer_dirent *res = REAL(opendir)(path);
	if (res)
	COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path);
	return res;
	}

	INTERCEPTOR(__sanitizer_dirent , readdir, void dirp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readdir, dirp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_dirent *res = REAL(readdir)(dirp);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
	return res;
	}

	INTERCEPTOR(int, readdir_r, void dirp, __sanitizer_dirent entry,
	__sanitizer_dirent **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readdir_r, dirp, entry, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(readdir_r)(dirp, entry, result);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (*result)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, (result)->d_reclen);
	}
	return res;
	}

	#define INIT_READDIR \
	COMMON_INTERCEPT_FUNCTION(opendir); \
	COMMON_INTERCEPT_FUNCTION(readdir); \
	COMMON_INTERCEPT_FUNCTION(readdir_r);
	#else
	#define INIT_READDIR
	#endif

	#if SANITIZER_INTERCEPT_READDIR64
	INTERCEPTOR(__sanitizer_dirent64 , readdir64, void dirp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readdir64, dirp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_dirent64 *res = REAL(readdir64)(dirp);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, res->d_reclen);
	return res;
	}

	INTERCEPTOR(int, readdir64_r, void dirp, __sanitizer_dirent64 entry,
	__sanitizer_dirent64 **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readdir64_r, dirp, entry, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(readdir64_r)(dirp, entry, result);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	if (*result)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, (result)->d_reclen);
	}
	return res;
	}
	#define INIT_READDIR64 \
	COMMON_INTERCEPT_FUNCTION(readdir64); \
	COMMON_INTERCEPT_FUNCTION(readdir64_r);
	#else
	#define INIT_READDIR64
	#endif

	#if SANITIZER_INTERCEPT_PTRACE
	INTERCEPTOR(uptr, ptrace, int request, int pid, void addr, void data) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ptrace, request, pid, addr, data);
	__sanitizer_iovec local_iovec;

	if (data) {
	if (request == ptrace_setregs) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_regs_struct_sz);
	} else if (request == ptrace_setfpregs) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpregs_struct_sz);
	} else if (request == ptrace_setfpxregs) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
	} else if (request == ptrace_setvfpregs) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
	} else if (request == ptrace_setsiginfo) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, siginfo_t_sz);

	// Some kernel might zero the iovec::iov_base in case of invalid
	// write access. In this case copy the invalid address for further
	// inspection.
	} else if (request == ptrace_setregset \|\| request == ptrace_getregset) {
	__sanitizer_iovec iovec = (__sanitizer_iovec)data;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec, sizeof(*iovec));
	local_iovec = *iovec;
	if (request == ptrace_setregset)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, iovec->iov_base, iovec->iov_len);
	}
	}

	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	uptr res = REAL(ptrace)(request, pid, addr, data);

	if (!res && data) {
	// Note that PEEK* requests assign different meaning to the return value.
	// This function does not handle them (nor does it need to).
	if (request == ptrace_getregs) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_regs_struct_sz);
	} else if (request == ptrace_getfpregs) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpregs_struct_sz);
	} else if (request == ptrace_getfpxregs) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_fpxregs_struct_sz);
	} else if (request == ptrace_getvfpregs) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, struct_user_vfpregs_struct_sz);
	} else if (request == ptrace_getsiginfo) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, siginfo_t_sz);
	} else if (request == ptrace_geteventmsg) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(unsigned long));
	} else if (request == ptrace_getregset) {
	__sanitizer_iovec iovec = (__sanitizer_iovec)data;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, iovec, sizeof(*iovec));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, local_iovec.iov_base,
	local_iovec.iov_len);
	}
	}
	return res;
	}

	#define INIT_PTRACE COMMON_INTERCEPT_FUNCTION(ptrace);
	#else
	#define INIT_PTRACE
	#endif

	#if SANITIZER_INTERCEPT_SETLOCALE
	static void unpoison_ctype_arrays(void *ctx) {
	#if SANITIZER_NETBSD
	// These arrays contain 256 regular elements in unsigned char range + 1 EOF
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _ctype_tab_, 257 * sizeof(short));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _toupper_tab_, 257 * sizeof(short));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, _tolower_tab_, 257 * sizeof(short));
	#endif
	}

	INTERCEPTOR(char , setlocale, int category, char locale) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setlocale, category, locale);
	if (locale)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, locale, internal_strlen(locale) + 1);
	char *res = REAL(setlocale)(category, locale);
	if (res) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	unpoison_ctype_arrays(ctx);
	}
	return res;
	}

	#define INIT_SETLOCALE COMMON_INTERCEPT_FUNCTION(setlocale);
	#else
	#define INIT_SETLOCALE
	#endif

	#if SANITIZER_INTERCEPT_GETCWD
	INTERCEPTOR(char , getcwd, char buf, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getcwd, buf, size);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(getcwd)(buf, size);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_GETCWD COMMON_INTERCEPT_FUNCTION(getcwd);
	#else
	#define INIT_GETCWD
	#endif

	#if SANITIZER_INTERCEPT_GET_CURRENT_DIR_NAME
	INTERCEPTOR(char *, get_current_dir_name, int fake) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, get_current_dir_name, fake);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(get_current_dir_name)(fake);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}

	#define INIT_GET_CURRENT_DIR_NAME \
	COMMON_INTERCEPT_FUNCTION(get_current_dir_name);
	#else
	#define INIT_GET_CURRENT_DIR_NAME
	#endif

	UNUSED static inline void FixRealStrtolEndptr(const char nptr, char *endptr) {
	CHECK(endptr);
	if (nptr == *endptr) {
	// No digits were found at strtol call, we need to find out the last
	// symbol accessed by strtoll on our own.
	// We get this symbol by skipping leading blanks and optional +/- sign.
	while (IsSpace(*nptr)) nptr++;
	if (nptr == '+' \|\| nptr == '-') nptr++;
	endptr = const_cast<char >(nptr);
	}
	CHECK(*endptr >= nptr);
	}

	UNUSED static inline void StrtolFixAndCheck(void ctx, const char nptr,
	char *endptr, char real_endptr, int base) {
	if (endptr) {
	*endptr = real_endptr;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, endptr, sizeof(*endptr));
	}
	// If base has unsupported value, strtol can exit with EINVAL
	// without reading any characters. So do additional checks only
	// if base is valid.
	bool is_valid_base = (base == 0) \|\| (2 <= base && base <= 36);
	if (is_valid_base) {
	FixRealStrtolEndptr(nptr, &real_endptr);
	}
	COMMON_INTERCEPTOR_READ_STRING(ctx, nptr, is_valid_base ?
	(real_endptr - nptr) + 1 : 0);
	}


	#if SANITIZER_INTERCEPT_STRTOIMAX
	INTERCEPTOR(INTMAX_T, strtoimax, const char nptr, char *endptr, int base) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *real_endptr;
	INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base);
	StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
	return res;
	}

	INTERCEPTOR(UINTMAX_T, strtoumax, const char nptr, char *endptr, int base) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *real_endptr;
	UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base);
	StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
	return res;
	}

	#define INIT_STRTOIMAX \
	COMMON_INTERCEPT_FUNCTION(strtoimax); \
	COMMON_INTERCEPT_FUNCTION(strtoumax);
	#else
	#define INIT_STRTOIMAX
	#endif

	#if SANITIZER_INTERCEPT_MBSTOWCS
	INTERCEPTOR(SIZE_T, mbstowcs, wchar_t dest, const char src, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mbstowcs, dest, src, len);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(mbstowcs)(dest, src, len);
	if (res != (SIZE_T) - 1 && dest) {
	SIZE_T write_cnt = res + (res < len);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
	}
	return res;
	}

	INTERCEPTOR(SIZE_T, mbsrtowcs, wchar_t dest, const char *src, SIZE_T len,
	void *ps) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mbsrtowcs, dest, src, len, ps);
	if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
	if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(mbsrtowcs)(dest, src, len, ps);
	if (res != (SIZE_T)(-1) && dest && src) {
	// This function, and several others, may or may not write the terminating
	// \0 character. They write it iff they clear *src.
	SIZE_T write_cnt = res + !*src;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
	}
	return res;
	}

	#define INIT_MBSTOWCS \
	COMMON_INTERCEPT_FUNCTION(mbstowcs); \
	COMMON_INTERCEPT_FUNCTION(mbsrtowcs);
	#else
	#define INIT_MBSTOWCS
	#endif

	#if SANITIZER_INTERCEPT_MBSNRTOWCS
	INTERCEPTOR(SIZE_T, mbsnrtowcs, wchar_t dest, const char *src, SIZE_T nms,
	SIZE_T len, void *ps) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mbsnrtowcs, dest, src, nms, len, ps);
	if (src) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
	if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
	}
	if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(mbsnrtowcs)(dest, src, nms, len, ps);
	if (res != (SIZE_T)(-1) && dest && src) {
	SIZE_T write_cnt = res + !*src;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt * sizeof(wchar_t));
	}
	return res;
	}

	#define INIT_MBSNRTOWCS COMMON_INTERCEPT_FUNCTION(mbsnrtowcs);
	#else
	#define INIT_MBSNRTOWCS
	#endif

	#if SANITIZER_INTERCEPT_WCSTOMBS
	INTERCEPTOR(SIZE_T, wcstombs, char dest, const wchar_t src, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcstombs, dest, src, len);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(wcstombs)(dest, src, len);
	if (res != (SIZE_T) - 1 && dest) {
	SIZE_T write_cnt = res + (res < len);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
	}
	return res;
	}

	INTERCEPTOR(SIZE_T, wcsrtombs, char dest, const wchar_t *src, SIZE_T len,
	void *ps) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcsrtombs, dest, src, len, ps);
	if (src) COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
	if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(wcsrtombs)(dest, src, len, ps);
	if (res != (SIZE_T) - 1 && dest && src) {
	SIZE_T write_cnt = res + !*src;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
	}
	return res;
	}

	#define INIT_WCSTOMBS \
	COMMON_INTERCEPT_FUNCTION(wcstombs); \
	COMMON_INTERCEPT_FUNCTION(wcsrtombs);
	#else
	#define INIT_WCSTOMBS
	#endif

	#if SANITIZER_INTERCEPT_WCSNRTOMBS
	INTERCEPTOR(SIZE_T, wcsnrtombs, char dest, const wchar_t *src, SIZE_T nms,
	SIZE_T len, void *ps) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcsnrtombs, dest, src, nms, len, ps);
	if (src) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));
	if (nms) COMMON_INTERCEPTOR_READ_RANGE(ctx, *src, nms);
	}
	if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(wcsnrtombs)(dest, src, nms, len, ps);
	if (res != ((SIZE_T)-1) && dest && src) {
	SIZE_T write_cnt = res + !*src;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, write_cnt);
	}
	return res;
	}

	#define INIT_WCSNRTOMBS COMMON_INTERCEPT_FUNCTION(wcsnrtombs);
	#else
	#define INIT_WCSNRTOMBS
	#endif


	#if SANITIZER_INTERCEPT_WCRTOMB
	INTERCEPTOR(SIZE_T, wcrtomb, char dest, wchar_t src, void ps) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcrtomb, dest, src, ps);
	if (ps) COMMON_INTERCEPTOR_READ_RANGE(ctx, ps, mbstate_t_sz);

	if (!dest)
	return REAL(wcrtomb)(dest, src, ps);

	char local_dest[32];
	SIZE_T res = REAL(wcrtomb)(local_dest, src, ps);
	if (res != ((SIZE_T)-1)) {
	CHECK_LE(res, sizeof(local_dest));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, res);
	REAL(memcpy)(dest, local_dest, res);
	}
	return res;
	}

	#define INIT_WCRTOMB COMMON_INTERCEPT_FUNCTION(wcrtomb);
	#else
	#define INIT_WCRTOMB
	#endif

	#if SANITIZER_INTERCEPT_WCTOMB
	INTERCEPTOR(int, wctomb, char *dest, wchar_t src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wctomb, dest, src);
	if (!dest)
	return REAL(wctomb)(dest, src);

	char local_dest[32];
	int res = REAL(wctomb)(local_dest, src);
	if (res != -1) {
	CHECK_LE(res, sizeof(local_dest));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, res);
	REAL(memcpy)(dest, local_dest, res);
	}
	return res;
	}

	#define INIT_WCTOMB COMMON_INTERCEPT_FUNCTION(wctomb);
	#else
	#define INIT_WCTOMB
	#endif

	#if SANITIZER_INTERCEPT_TCGETATTR
	INTERCEPTOR(int, tcgetattr, int fd, void *termios_p) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, tcgetattr, fd, termios_p);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(tcgetattr)(fd, termios_p);
	if (!res && termios_p)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, termios_p, struct_termios_sz);
	return res;
	}

	#define INIT_TCGETATTR COMMON_INTERCEPT_FUNCTION(tcgetattr);
	#else
	#define INIT_TCGETATTR
	#endif

	#if SANITIZER_INTERCEPT_REALPATH
	INTERCEPTOR(char , realpath, const char path, char *resolved_path) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, realpath, path, resolved_path);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);

	// Workaround a bug in glibc where dlsym(RTLD_NEXT, ...) returns the oldest
	// version of a versioned symbol. For realpath(), this gives us something
	// (called __old_realpath) that does not handle NULL in the second argument.
	// Handle it as part of the interceptor.
	char *allocated_path = nullptr;
	if (!resolved_path)
	allocated_path = resolved_path = (char *)WRAP(malloc)(path_max + 1);

	char *res = REAL(realpath)(path, resolved_path);
	if (allocated_path && !res)
	WRAP(free)(allocated_path);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}
	# define INIT_REALPATH COMMON_INTERCEPT_FUNCTION(realpath);
	#else
	#define INIT_REALPATH
	#endif

	#if SANITIZER_INTERCEPT_CANONICALIZE_FILE_NAME
	INTERCEPTOR(char , canonicalize_file_name, const char path) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, canonicalize_file_name, path);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	char *res = REAL(canonicalize_file_name)(path);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_CANONICALIZE_FILE_NAME \
	COMMON_INTERCEPT_FUNCTION(canonicalize_file_name);
	#else
	#define INIT_CANONICALIZE_FILE_NAME
	#endif

	#if SANITIZER_INTERCEPT_CONFSTR
	INTERCEPTOR(SIZE_T, confstr, int name, char *buf, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, confstr, name, buf, len);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(confstr)(name, buf, len);
	if (buf && res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res < len ? res : len);
	return res;
	}
	#define INIT_CONFSTR COMMON_INTERCEPT_FUNCTION(confstr);
	#else
	#define INIT_CONFSTR
	#endif

	#if SANITIZER_INTERCEPT_SCHED_GETAFFINITY
	INTERCEPTOR(int, sched_getaffinity, int pid, SIZE_T cpusetsize, void *mask) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sched_getaffinity, pid, cpusetsize, mask);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(sched_getaffinity)(pid, cpusetsize, mask);
	if (mask && !res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mask, cpusetsize);
	return res;
	}
	#define INIT_SCHED_GETAFFINITY COMMON_INTERCEPT_FUNCTION(sched_getaffinity);
	#else
	#define INIT_SCHED_GETAFFINITY
	#endif

	#if SANITIZER_INTERCEPT_SCHED_GETPARAM
	INTERCEPTOR(int, sched_getparam, int pid, void *param) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sched_getparam, pid, param);
	int res = REAL(sched_getparam)(pid, param);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, param, struct_sched_param_sz);
	return res;
	}
	#define INIT_SCHED_GETPARAM COMMON_INTERCEPT_FUNCTION(sched_getparam);
	#else
	#define INIT_SCHED_GETPARAM
	#endif

	#if SANITIZER_INTERCEPT_STRERROR
	INTERCEPTOR(char *, strerror, int errnum) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strerror, errnum);
	COMMON_INTERCEPTOR_STRERROR();
	char *res = REAL(strerror)(errnum);
	if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_STRERROR COMMON_INTERCEPT_FUNCTION(strerror);
	#else
	#define INIT_STRERROR
	#endif

	#if SANITIZER_INTERCEPT_STRERROR_R
	// There are 2 versions of strerror_r:
	// * POSIX version returns 0 on success, negative error code on failure,
	// writes message to buf.
	// * GNU version returns message pointer, which points to either buf or some
	// static storage.
	#if ((_POSIX_C_SOURCE >= 200112L \|\| _XOPEN_SOURCE >= 600) && !_GNU_SOURCE) \|\| \
	SANITIZER_APPLE \|\| SANITIZER_ANDROID \|\| SANITIZER_NETBSD \|\| \
	SANITIZER_FREEBSD
	// POSIX version. Spec is not clear on whether buf is NULL-terminated.
	// At least on OSX, buf contents are valid even when the call fails.
	INTERCEPTOR(int, strerror_r, int errnum, char *buf, SIZE_T buflen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(strerror_r)(errnum, buf, buflen);

	SIZE_T sz = internal_strnlen(buf, buflen);
	if (sz < buflen) ++sz;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
	return res;
	}
	#else
	// GNU version.
	INTERCEPTOR(char , strerror_r, int errnum, char buf, SIZE_T buflen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strerror_r, errnum, buf, buflen);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(strerror_r)(errnum, buf, buflen);
	if (res == buf)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	else
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#endif //(_POSIX_C_SOURCE >= 200112L \|\| _XOPEN_SOURCE >= 600) && !_GNU_SOURCE \|\|
	//SANITIZER_APPLE
	#define INIT_STRERROR_R COMMON_INTERCEPT_FUNCTION(strerror_r);
	#else
	#define INIT_STRERROR_R
	#endif

	#if SANITIZER_INTERCEPT_XPG_STRERROR_R
	INTERCEPTOR(int, __xpg_strerror_r, int errnum, char *buf, SIZE_T buflen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __xpg_strerror_r, errnum, buf, buflen);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(__xpg_strerror_r)(errnum, buf, buflen);
	// This version always returns a null-terminated string.
	if (buf && buflen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
	return res;
	}
	#define INIT_XPG_STRERROR_R COMMON_INTERCEPT_FUNCTION(__xpg_strerror_r);
	#else
	#define INIT_XPG_STRERROR_R
	#endif

	#if SANITIZER_INTERCEPT_SCANDIR
	typedef int (scandir_filter_f)(const struct __sanitizer_dirent );
	typedef int (scandir_compar_f)(const struct __sanitizer_dirent *,
	const struct __sanitizer_dirent **);

	static THREADLOCAL scandir_filter_f scandir_filter;
	static THREADLOCAL scandir_compar_f scandir_compar;

	static int wrapped_scandir_filter(const struct __sanitizer_dirent *dir) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(dir, dir->d_reclen);
	return scandir_filter(dir);
	}

	static int wrapped_scandir_compar(const struct __sanitizer_dirent **a,
	const struct __sanitizer_dirent **b) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, sizeof(*a));
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, (a)->d_reclen);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, sizeof(*b));
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, (b)->d_reclen);
	return scandir_compar(a, b);
	}

	INTERCEPTOR(int, scandir, char dirp, __sanitizer_dirent **namelist,
	scandir_filter_f filter, scandir_compar_f compar) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, scandir, dirp, namelist, filter, compar);
	if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
	scandir_filter = filter;
	scandir_compar = compar;
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(scandir)(dirp, namelist,
	filter ? wrapped_scandir_filter : nullptr,
	compar ? wrapped_scandir_compar : nullptr);
	scandir_filter = nullptr;
	scandir_compar = nullptr;
	if (namelist && res > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(*namelist));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(namelist) res);
	for (int i = 0; i < res; ++i)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (*namelist)[i],
	(*namelist)[i]->d_reclen);
	}
	return res;
	}
	#define INIT_SCANDIR COMMON_INTERCEPT_FUNCTION(scandir);
	#else
	#define INIT_SCANDIR
	#endif

	#if SANITIZER_INTERCEPT_SCANDIR64
	typedef int (scandir64_filter_f)(const struct __sanitizer_dirent64 );
	typedef int (scandir64_compar_f)(const struct __sanitizer_dirent64 *,
	const struct __sanitizer_dirent64 **);

	static THREADLOCAL scandir64_filter_f scandir64_filter;
	static THREADLOCAL scandir64_compar_f scandir64_compar;

	static int wrapped_scandir64_filter(const struct __sanitizer_dirent64 *dir) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(dir, dir->d_reclen);
	return scandir64_filter(dir);
	}

	static int wrapped_scandir64_compar(const struct __sanitizer_dirent64 **a,
	const struct __sanitizer_dirent64 **b) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, sizeof(*a));
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, (a)->d_reclen);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, sizeof(*b));
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, (b)->d_reclen);
	return scandir64_compar(a, b);
	}

	INTERCEPTOR(int, scandir64, char dirp, __sanitizer_dirent64 **namelist,
	scandir64_filter_f filter, scandir64_compar_f compar) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, scandir64, dirp, namelist, filter, compar);
	if (dirp) COMMON_INTERCEPTOR_READ_RANGE(ctx, dirp, internal_strlen(dirp) + 1);
	scandir64_filter = filter;
	scandir64_compar = compar;
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res =
	REAL(scandir64)(dirp, namelist,
	filter ? wrapped_scandir64_filter : nullptr,
	compar ? wrapped_scandir64_compar : nullptr);
	scandir64_filter = nullptr;
	scandir64_compar = nullptr;
	if (namelist && res > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(*namelist));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelist, sizeof(namelist) res);
	for (int i = 0; i < res; ++i)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (*namelist)[i],
	(*namelist)[i]->d_reclen);
	}
	return res;
	}
	#define INIT_SCANDIR64 COMMON_INTERCEPT_FUNCTION(scandir64);
	#else
	#define INIT_SCANDIR64
	#endif

	#if SANITIZER_INTERCEPT_GETGROUPS
	INTERCEPTOR(int, getgroups, int size, u32 *lst) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getgroups, size, lst);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getgroups)(size, lst);
	if (res >= 0 && lst && size > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lst, res * sizeof(*lst));
	return res;
	}
	#define INIT_GETGROUPS COMMON_INTERCEPT_FUNCTION(getgroups);
	#else
	#define INIT_GETGROUPS
	#endif

	#if SANITIZER_INTERCEPT_POLL
	static void read_pollfd(void ctx, __sanitizer_pollfd fds,
	__sanitizer_nfds_t nfds) {
	for (unsigned i = 0; i < nfds; ++i) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &fds[i].fd, sizeof(fds[i].fd));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &fds[i].events, sizeof(fds[i].events));
	}
	}

	static void write_pollfd(void ctx, __sanitizer_pollfd fds,
	__sanitizer_nfds_t nfds) {
	for (unsigned i = 0; i < nfds; ++i)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &fds[i].revents,
	sizeof(fds[i].revents));
	}

	INTERCEPTOR(int, poll, __sanitizer_pollfd *fds, __sanitizer_nfds_t nfds,
	int timeout) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, poll, fds, nfds, timeout);
	if (fds && nfds) read_pollfd(ctx, fds, nfds);
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(poll)(fds, nfds, timeout);
	if (fds && nfds) write_pollfd(ctx, fds, nfds);
	return res;
	}
	#define INIT_POLL COMMON_INTERCEPT_FUNCTION(poll);
	#else
	#define INIT_POLL
	#endif

	#if SANITIZER_INTERCEPT_PPOLL
	INTERCEPTOR(int, ppoll, __sanitizer_pollfd *fds, __sanitizer_nfds_t nfds,
	void timeout_ts, __sanitizer_sigset_t sigmask) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ppoll, fds, nfds, timeout_ts, sigmask);
	if (fds && nfds) read_pollfd(ctx, fds, nfds);
	if (timeout_ts)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout_ts, struct_timespec_sz);
	if (sigmask) COMMON_INTERCEPTOR_READ_RANGE(ctx, sigmask, sizeof(*sigmask));
	int res =
	COMMON_INTERCEPTOR_BLOCK_REAL(ppoll)(fds, nfds, timeout_ts, sigmask);
	if (fds && nfds) write_pollfd(ctx, fds, nfds);
	return res;
	}
	#define INIT_PPOLL COMMON_INTERCEPT_FUNCTION(ppoll);
	#else
	#define INIT_PPOLL
	#endif

	#if SANITIZER_INTERCEPT_WORDEXP
	INTERCEPTOR(int, wordexp, char s, __sanitizer_wordexp_t p, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wordexp, s, p, flags);
	if (s) COMMON_INTERCEPTOR_READ_RANGE(ctx, s, internal_strlen(s) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(wordexp)(s, p, flags);
	if (!res && p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
	uptr we_wordc =
	((flags & wordexp_wrde_dooffs) ? p->we_offs : 0) + p->we_wordc;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->we_wordv,
	sizeof(p->we_wordv) (we_wordc + 1));
	for (uptr i = 0; i < we_wordc; ++i) {
	char *w = p->we_wordv[i];
	if (w) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, w, internal_strlen(w) + 1);
	}
	}
	return res;
	}
	#define INIT_WORDEXP COMMON_INTERCEPT_FUNCTION(wordexp);
	#else
	#define INIT_WORDEXP
	#endif

	#if SANITIZER_INTERCEPT_SIGWAIT
	INTERCEPTOR(int, sigwait, __sanitizer_sigset_t set, int sig) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigwait, set, sig);
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigwait)(set, sig);
	if (!res && sig) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sig, sizeof(*sig));
	return res;
	}
	#define INIT_SIGWAIT COMMON_INTERCEPT_FUNCTION(sigwait);
	#else
	#define INIT_SIGWAIT
	#endif

	#if SANITIZER_INTERCEPT_SIGWAITINFO
	INTERCEPTOR(int, sigwaitinfo, __sanitizer_sigset_t set, void info) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigwaitinfo, set, info);
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigwaitinfo)(set, info);
	if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
	return res;
	}
	#define INIT_SIGWAITINFO COMMON_INTERCEPT_FUNCTION(sigwaitinfo);
	#else
	#define INIT_SIGWAITINFO
	#endif

	#if SANITIZER_INTERCEPT_SIGTIMEDWAIT
	INTERCEPTOR(int, sigtimedwait, __sanitizer_sigset_t set, void info,
	void *timeout) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigtimedwait, set, info, timeout);
	if (timeout) COMMON_INTERCEPTOR_READ_RANGE(ctx, timeout, struct_timespec_sz);
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(sigtimedwait)(set, info, timeout);
	if (res > 0 && info) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, info, siginfo_t_sz);
	return res;
	}
	#define INIT_SIGTIMEDWAIT COMMON_INTERCEPT_FUNCTION(sigtimedwait);
	#else
	#define INIT_SIGTIMEDWAIT
	#endif

	#if SANITIZER_INTERCEPT_SIGSETOPS
	INTERCEPTOR(int, sigemptyset, __sanitizer_sigset_t *set) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigemptyset, set);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(sigemptyset)(set);
	if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
	return res;
	}

	INTERCEPTOR(int, sigfillset, __sanitizer_sigset_t *set) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigfillset, set);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(sigfillset)(set);
	if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
	return res;
	}
	#define INIT_SIGSETOPS \
	COMMON_INTERCEPT_FUNCTION(sigemptyset); \
	COMMON_INTERCEPT_FUNCTION(sigfillset);
	#else
	#define INIT_SIGSETOPS
	#endif

	#if SANITIZER_INTERCEPT_SIGSET_LOGICOPS
	INTERCEPTOR(int, sigandset, __sanitizer_sigset_t *dst,
	__sanitizer_sigset_t src1, __sanitizer_sigset_t src2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigandset, dst, src1, src2);
	if (src1)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
	if (src2)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
	int res = REAL(sigandset)(dst, src1, src2);
	if (!res && dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
	return res;
	}

	INTERCEPTOR(int, sigorset, __sanitizer_sigset_t *dst,
	__sanitizer_sigset_t src1, __sanitizer_sigset_t src2) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigorset, dst, src1, src2);
	if (src1)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
	if (src2)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
	int res = REAL(sigorset)(dst, src1, src2);
	if (!res && dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
	return res;
	}
	#define INIT_SIGSET_LOGICOPS \
	COMMON_INTERCEPT_FUNCTION(sigandset); \
	COMMON_INTERCEPT_FUNCTION(sigorset);
	#else
	#define INIT_SIGSET_LOGICOPS
	#endif

	#if SANITIZER_INTERCEPT_SIGPENDING
	INTERCEPTOR(int, sigpending, __sanitizer_sigset_t *set) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigpending, set);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(sigpending)(set);
	if (!res && set) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, set, sizeof(*set));
	return res;
	}
	#define INIT_SIGPENDING COMMON_INTERCEPT_FUNCTION(sigpending);
	#else
	#define INIT_SIGPENDING
	#endif

	#if SANITIZER_INTERCEPT_SIGPROCMASK
	INTERCEPTOR(int, sigprocmask, int how, __sanitizer_sigset_t *set,
	__sanitizer_sigset_t *oldset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigprocmask, how, set, oldset);
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(sigprocmask)(how, set, oldset);
	if (!res && oldset)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
	return res;
	}
	#define INIT_SIGPROCMASK COMMON_INTERCEPT_FUNCTION(sigprocmask);
	#else
	#define INIT_SIGPROCMASK
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_SIGMASK
	INTERCEPTOR(int, pthread_sigmask, int how, __sanitizer_sigset_t *set,
	__sanitizer_sigset_t *oldset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_sigmask, how, set, oldset);
	if (set) COMMON_INTERCEPTOR_READ_RANGE(ctx, set, sizeof(*set));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(pthread_sigmask)(how, set, oldset);
	if (!res && oldset)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldset, sizeof(*oldset));
	return res;
	}
	#define INIT_PTHREAD_SIGMASK COMMON_INTERCEPT_FUNCTION(pthread_sigmask);
	#else
	#define INIT_PTHREAD_SIGMASK
	#endif

	#if SANITIZER_INTERCEPT_BACKTRACE
	INTERCEPTOR(int, backtrace, void **buffer, int size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, backtrace, buffer, size);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(backtrace)(buffer, size);
	if (res && buffer)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buffer, res * sizeof(*buffer));
	return res;
	}

	INTERCEPTOR(char , backtrace_symbols, void buffer, int size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, backtrace_symbols, buffer, size);
	if (buffer && size)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, size * sizeof(*buffer));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char **res = REAL(backtrace_symbols)(buffer, size);
	if (res && size) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, size * sizeof(*res));
	for (int i = 0; i < size; ++i)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res[i], internal_strlen(res[i]) + 1);
	}
	return res;
	}
	#define INIT_BACKTRACE \
	COMMON_INTERCEPT_FUNCTION(backtrace); \
	COMMON_INTERCEPT_FUNCTION(backtrace_symbols);
	#else
	#define INIT_BACKTRACE
	#endif

	#if SANITIZER_INTERCEPT__EXIT
	INTERCEPTOR(void, _exit, int status) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, _exit, status);
	COMMON_INTERCEPTOR_USER_CALLBACK_START();
	int status1 = COMMON_INTERCEPTOR_ON_EXIT(ctx);
	COMMON_INTERCEPTOR_USER_CALLBACK_END();
	if (status == 0) status = status1;
	REAL(_exit)(status);
	}
	#define INIT__EXIT COMMON_INTERCEPT_FUNCTION(_exit);
	#else
	#define INIT__EXIT
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEX
	INTERCEPTOR(int, pthread_mutex_lock, void *m) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_mutex_lock, m);
	COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m);
	int res = REAL(pthread_mutex_lock)(m);
	if (res == errno_EOWNERDEAD)
	COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m);
	if (res == 0 \|\| res == errno_EOWNERDEAD)
	COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m);
	if (res == errno_EINVAL)
	COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
	return res;
	}

	INTERCEPTOR(int, pthread_mutex_unlock, void *m) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_mutex_unlock, m);
	COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m);
	int res = REAL(pthread_mutex_unlock)(m);
	if (res == errno_EINVAL)
	COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
	return res;
	}

	#define INIT_PTHREAD_MUTEX_LOCK COMMON_INTERCEPT_FUNCTION(pthread_mutex_lock)
	#define INIT_PTHREAD_MUTEX_UNLOCK \
	COMMON_INTERCEPT_FUNCTION(pthread_mutex_unlock)
	#else
	#define INIT_PTHREAD_MUTEX_LOCK
	#define INIT_PTHREAD_MUTEX_UNLOCK
	#endif

	#if SANITIZER_INTERCEPT___PTHREAD_MUTEX
	INTERCEPTOR(int, __pthread_mutex_lock, void *m) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __pthread_mutex_lock, m);
	COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m);
	int res = REAL(__pthread_mutex_lock)(m);
	if (res == errno_EOWNERDEAD)
	COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m);
	if (res == 0 \|\| res == errno_EOWNERDEAD)
	COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m);
	if (res == errno_EINVAL)
	COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
	return res;
	}

	INTERCEPTOR(int, __pthread_mutex_unlock, void *m) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __pthread_mutex_unlock, m);
	COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m);
	int res = REAL(__pthread_mutex_unlock)(m);
	if (res == errno_EINVAL)
	COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m);
	return res;
	}

	#define INIT___PTHREAD_MUTEX_LOCK \
	COMMON_INTERCEPT_FUNCTION(__pthread_mutex_lock)
	#define INIT___PTHREAD_MUTEX_UNLOCK \
	COMMON_INTERCEPT_FUNCTION(__pthread_mutex_unlock)
	#else
	#define INIT___PTHREAD_MUTEX_LOCK
	#define INIT___PTHREAD_MUTEX_UNLOCK
	#endif

	#if SANITIZER_INTERCEPT___LIBC_MUTEX
	INTERCEPTOR(int, __libc_mutex_lock, void *m)
	ALIAS(WRAPPER_NAME(pthread_mutex_lock));

	INTERCEPTOR(int, __libc_mutex_unlock, void *m)
	ALIAS(WRAPPER_NAME(pthread_mutex_unlock));

	INTERCEPTOR(int, __libc_thr_setcancelstate, int state, int *oldstate)
	ALIAS(WRAPPER_NAME(pthread_setcancelstate));

	#define INIT___LIBC_MUTEX_LOCK COMMON_INTERCEPT_FUNCTION(__libc_mutex_lock)
	#define INIT___LIBC_MUTEX_UNLOCK COMMON_INTERCEPT_FUNCTION(__libc_mutex_unlock)
	#define INIT___LIBC_THR_SETCANCELSTATE \
	COMMON_INTERCEPT_FUNCTION(__libc_thr_setcancelstate)
	#else
	#define INIT___LIBC_MUTEX_LOCK
	#define INIT___LIBC_MUTEX_UNLOCK
	#define INIT___LIBC_THR_SETCANCELSTATE
	#endif

	#if SANITIZER_INTERCEPT_GETMNTENT \|\| SANITIZER_INTERCEPT_GETMNTENT_R
	static void write_mntent(void ctx, __sanitizer_mntent mnt) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt, sizeof(*mnt));
	if (mnt->mnt_fsname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_fsname,
	internal_strlen(mnt->mnt_fsname) + 1);
	if (mnt->mnt_dir)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_dir,
	internal_strlen(mnt->mnt_dir) + 1);
	if (mnt->mnt_type)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_type,
	internal_strlen(mnt->mnt_type) + 1);
	if (mnt->mnt_opts)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mnt->mnt_opts,
	internal_strlen(mnt->mnt_opts) + 1);
	}
	#endif

	#if SANITIZER_INTERCEPT_GETMNTENT
	INTERCEPTOR(__sanitizer_mntent , getmntent, void fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getmntent, fp);
	__sanitizer_mntent *res = REAL(getmntent)(fp);
	if (res) write_mntent(ctx, res);
	return res;
	}
	#define INIT_GETMNTENT COMMON_INTERCEPT_FUNCTION(getmntent);
	#else
	#define INIT_GETMNTENT
	#endif

	#if SANITIZER_INTERCEPT_GETMNTENT_R
	INTERCEPTOR(__sanitizer_mntent , getmntent_r, void fp,
	__sanitizer_mntent mntbuf, char buf, int buflen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getmntent_r, fp, mntbuf, buf, buflen);
	__sanitizer_mntent *res = REAL(getmntent_r)(fp, mntbuf, buf, buflen);
	if (res) write_mntent(ctx, res);
	return res;
	}
	#define INIT_GETMNTENT_R COMMON_INTERCEPT_FUNCTION(getmntent_r);
	#else
	#define INIT_GETMNTENT_R
	#endif

	#if SANITIZER_INTERCEPT_STATFS
	INTERCEPTOR(int, statfs, char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, statfs, path, buf);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(statfs)(path, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
	return res;
	}
	INTERCEPTOR(int, fstatfs, int fd, void *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fstatfs, fd, buf);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fstatfs)(fd, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs_sz);
	return res;
	}
	#define INIT_STATFS \
	COMMON_INTERCEPT_FUNCTION(statfs); \
	COMMON_INTERCEPT_FUNCTION(fstatfs);
	#else
	#define INIT_STATFS
	#endif

	#if SANITIZER_INTERCEPT_STATFS64
	INTERCEPTOR(int, statfs64, char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, statfs64, path, buf);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(statfs64)(path, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
	return res;
	}
	INTERCEPTOR(int, fstatfs64, int fd, void *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fstatfs64, fd, buf);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fstatfs64)(fd, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statfs64_sz);
	return res;
	}
	#define INIT_STATFS64 \
	COMMON_INTERCEPT_FUNCTION(statfs64); \
	COMMON_INTERCEPT_FUNCTION(fstatfs64);
	#else
	#define INIT_STATFS64
	#endif

	#if SANITIZER_INTERCEPT_STATVFS
	INTERCEPTOR(int, statvfs, char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, statvfs, path, buf);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(statvfs)(path, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
	return res;
	}
	INTERCEPTOR(int, fstatvfs, int fd, void *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs, fd, buf);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fstatvfs)(fd, buf);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
	if (fd >= 0)
	COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	}
	return res;
	}
	#define INIT_STATVFS \
	COMMON_INTERCEPT_FUNCTION(statvfs); \
	COMMON_INTERCEPT_FUNCTION(fstatvfs);
	#else
	#define INIT_STATVFS
	#endif

	#if SANITIZER_INTERCEPT_STATVFS64
	INTERCEPTOR(int, statvfs64, char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, statvfs64, path, buf);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(statvfs64)(path, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
	return res;
	}
	INTERCEPTOR(int, fstatvfs64, int fd, void *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs64, fd, buf);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(fstatvfs64)(fd, buf);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs64_sz);
	return res;
	}
	#define INIT_STATVFS64 \
	COMMON_INTERCEPT_FUNCTION(statvfs64); \
	COMMON_INTERCEPT_FUNCTION(fstatvfs64);
	#else
	#define INIT_STATVFS64
	#endif

	#if SANITIZER_INTERCEPT_INITGROUPS
	INTERCEPTOR(int, initgroups, char *user, u32 group) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, initgroups, user, group);
	if (user) COMMON_INTERCEPTOR_READ_RANGE(ctx, user, internal_strlen(user) + 1);
	int res = REAL(initgroups)(user, group);
	return res;
	}
	#define INIT_INITGROUPS COMMON_INTERCEPT_FUNCTION(initgroups);
	#else
	#define INIT_INITGROUPS
	#endif

	#if SANITIZER_INTERCEPT_ETHER_NTOA_ATON
	INTERCEPTOR(char , ether_ntoa, __sanitizer_ether_addr addr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa, addr);
	if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
	char *res = REAL(ether_ntoa)(addr);
	if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	INTERCEPTOR(__sanitizer_ether_addr , ether_aton, char buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_aton, buf);
	if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
	__sanitizer_ether_addr *res = REAL(ether_aton)(buf);
	if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, sizeof(*res));
	return res;
	}
	#define INIT_ETHER_NTOA_ATON \
	COMMON_INTERCEPT_FUNCTION(ether_ntoa); \
	COMMON_INTERCEPT_FUNCTION(ether_aton);
	#else
	#define INIT_ETHER_NTOA_ATON
	#endif

	#if SANITIZER_INTERCEPT_ETHER_HOST
	INTERCEPTOR(int, ether_ntohost, char hostname, __sanitizer_ether_addr addr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_ntohost, hostname, addr);
	if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(ether_ntohost)(hostname, addr);
	if (!res && hostname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
	return res;
	}
	INTERCEPTOR(int, ether_hostton, char hostname, __sanitizer_ether_addr addr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_hostton, hostname, addr);
	if (hostname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(ether_hostton)(hostname, addr);
	if (!res && addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
	return res;
	}
	INTERCEPTOR(int, ether_line, char line, __sanitizer_ether_addr addr,
	char *hostname) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_line, line, addr, hostname);
	if (line) COMMON_INTERCEPTOR_READ_RANGE(ctx, line, internal_strlen(line) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(ether_line)(line, addr, hostname);
	if (!res) {
	if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
	if (hostname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hostname, internal_strlen(hostname) + 1);
	}
	return res;
	}
	#define INIT_ETHER_HOST \
	COMMON_INTERCEPT_FUNCTION(ether_ntohost); \
	COMMON_INTERCEPT_FUNCTION(ether_hostton); \
	COMMON_INTERCEPT_FUNCTION(ether_line);
	#else
	#define INIT_ETHER_HOST
	#endif

	#if SANITIZER_INTERCEPT_ETHER_R
	INTERCEPTOR(char , ether_ntoa_r, __sanitizer_ether_addr addr, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_ntoa_r, addr, buf);
	if (addr) COMMON_INTERCEPTOR_READ_RANGE(ctx, addr, sizeof(*addr));
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(ether_ntoa_r)(addr, buf);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}
	INTERCEPTOR(__sanitizer_ether_addr , ether_aton_r, char buf,
	__sanitizer_ether_addr *addr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ether_aton_r, buf, addr);
	if (buf) COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, internal_strlen(buf) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_ether_addr *res = REAL(ether_aton_r)(buf, addr);
	if (res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(*res));
	return res;
	}
	#define INIT_ETHER_R \
	COMMON_INTERCEPT_FUNCTION(ether_ntoa_r); \
	COMMON_INTERCEPT_FUNCTION(ether_aton_r);
	#else
	#define INIT_ETHER_R
	#endif

	#if SANITIZER_INTERCEPT_SHMCTL
	INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, shmctl, shmid, cmd, buf);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(shmctl)(shmid, cmd, buf);
	if (res >= 0) {
	unsigned sz = 0;
	if (cmd == shmctl_ipc_stat \|\| cmd == shmctl_shm_stat)
	sz = sizeof(__sanitizer_shmid_ds);
	else if (cmd == shmctl_ipc_info)
	sz = struct_shminfo_sz;
	else if (cmd == shmctl_shm_info)
	sz = struct_shm_info_sz;
	if (sz) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sz);
	}
	return res;
	}
	#define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION(shmctl);
	#else
	#define INIT_SHMCTL
	#endif

	#if SANITIZER_INTERCEPT_RANDOM_R
	INTERCEPTOR(int, random_r, void buf, u32 result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, random_r, buf, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(random_r)(buf, result);
	if (!res && result)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	#define INIT_RANDOM_R COMMON_INTERCEPT_FUNCTION(random_r);
	#else
	#define INIT_RANDOM_R
	#endif

	// FIXME: under ASan the REAL() call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET \|\| \
	SANITIZER_INTERCEPT_PTHREAD_ATTR_GET_SCHED \|\| \
	SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSSCHED \|\| \
	SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GET \|\| \
	SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GET \|\| \
	SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GET \|\| \
	SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GET
	#define INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(fn, sz) \
	INTERCEPTOR(int, fn, void attr, void r) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, fn, attr, r); \
	int res = REAL(fn)(attr, r); \
	if (!res && r) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, r, sz); \
	return res; \
	}
	#define INTERCEPTOR_PTHREAD_ATTR_GET(what, sz) \
	INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_attr_get##what, sz)
	#define INTERCEPTOR_PTHREAD_MUTEXATTR_GET(what, sz) \
	INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_mutexattr_get##what, sz)
	#define INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(what, sz) \
	INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_rwlockattr_get##what, sz)
	#define INTERCEPTOR_PTHREAD_CONDATTR_GET(what, sz) \
	INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_condattr_get##what, sz)
	#define INTERCEPTOR_PTHREAD_BARRIERATTR_GET(what, sz) \
	INTERCEPTOR_PTHREAD_OBJECT_ATTR_GET(pthread_barrierattr_get##what, sz)
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET
	INTERCEPTOR_PTHREAD_ATTR_GET(detachstate, sizeof(int))
	INTERCEPTOR_PTHREAD_ATTR_GET(guardsize, sizeof(SIZE_T))
	INTERCEPTOR_PTHREAD_ATTR_GET(scope, sizeof(int))
	INTERCEPTOR_PTHREAD_ATTR_GET(stacksize, sizeof(SIZE_T))
	INTERCEPTOR(int, pthread_attr_getstack, void attr, void addr, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getstack, attr, addr, size);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(pthread_attr_getstack)(attr, addr, size);
	if (!res) {
	if (addr) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, addr, sizeof(*addr));
	if (size) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, size, sizeof(*size));
	}
	return res;
	}

	// We may need to call the real pthread_attr_getstack from the run-time
	// in sanitizer_common, but we don't want to include the interception headers
	// there. So, just define this function here.
	namespace __sanitizer {
	extern "C" {
	int real_pthread_attr_getstack(void attr, void addr, SIZE_T size) {
	return REAL(pthread_attr_getstack)(attr, addr, size);
	}
	} // extern "C"
	} // namespace __sanitizer

	#define INIT_PTHREAD_ATTR_GET \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getdetachstate); \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getguardsize); \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getscope); \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getstacksize); \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getstack);
	#else
	#define INIT_PTHREAD_ATTR_GET
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GET_SCHED
	INTERCEPTOR_PTHREAD_ATTR_GET(schedparam, struct_sched_param_sz)
	INTERCEPTOR_PTHREAD_ATTR_GET(schedpolicy, sizeof(int))

	#define INIT_PTHREAD_ATTR_GET_SCHED \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getschedparam); \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getschedpolicy);
	#else
	#define INIT_PTHREAD_ATTR_GET_SCHED
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GETINHERITSCHED
	INTERCEPTOR_PTHREAD_ATTR_GET(inheritsched, sizeof(int))

	#define INIT_PTHREAD_ATTR_GETINHERITSCHED \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getinheritsched);
	#else
	#define INIT_PTHREAD_ATTR_GETINHERITSCHED
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_ATTR_GETAFFINITY_NP
	INTERCEPTOR(int, pthread_attr_getaffinity_np, void *attr, SIZE_T cpusetsize,
	void *cpuset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_attr_getaffinity_np, attr, cpusetsize,
	cpuset);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(pthread_attr_getaffinity_np)(attr, cpusetsize, cpuset);
	if (!res && cpusetsize && cpuset)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cpuset, cpusetsize);
	return res;
	}

	#define INIT_PTHREAD_ATTR_GETAFFINITY_NP \
	COMMON_INTERCEPT_FUNCTION(pthread_attr_getaffinity_np);
	#else
	#define INIT_PTHREAD_ATTR_GETAFFINITY_NP
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_GETAFFINITY_NP
	INTERCEPTOR(int, pthread_getaffinity_np, void *attr, SIZE_T cpusetsize,
	void *cpuset) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_getaffinity_np, attr, cpusetsize,
	cpuset);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(pthread_getaffinity_np)(attr, cpusetsize, cpuset);
	if (!res && cpusetsize && cpuset)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cpuset, cpusetsize);
	return res;
	}

	#define INIT_PTHREAD_GETAFFINITY_NP \
	COMMON_INTERCEPT_FUNCTION(pthread_getaffinity_np);
	#else
	#define INIT_PTHREAD_GETAFFINITY_NP
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPSHARED
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(pshared, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETPSHARED \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getpshared);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETPSHARED
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETTYPE
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(type, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETTYPE \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_gettype);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETTYPE
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPROTOCOL
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(protocol, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprotocol);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETPROTOCOL
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETPRIOCEILING
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(prioceiling, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getprioceiling);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETROBUST \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETROBUST
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_MUTEXATTR_GETROBUST_NP
	INTERCEPTOR_PTHREAD_MUTEXATTR_GET(robust_np, sizeof(int))
	#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP \
	COMMON_INTERCEPT_FUNCTION(pthread_mutexattr_getrobust_np);
	#else
	#define INIT_PTHREAD_MUTEXATTR_GETROBUST_NP
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETPSHARED
	INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(pshared, sizeof(int))
	#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED \
	COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getpshared);
	#else
	#define INIT_PTHREAD_RWLOCKATTR_GETPSHARED
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_RWLOCKATTR_GETKIND_NP
	INTERCEPTOR_PTHREAD_RWLOCKATTR_GET(kind_np, sizeof(int))
	#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP \
	COMMON_INTERCEPT_FUNCTION(pthread_rwlockattr_getkind_np);
	#else
	#define INIT_PTHREAD_RWLOCKATTR_GETKIND_NP
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETPSHARED
	INTERCEPTOR_PTHREAD_CONDATTR_GET(pshared, sizeof(int))
	#define INIT_PTHREAD_CONDATTR_GETPSHARED \
	COMMON_INTERCEPT_FUNCTION(pthread_condattr_getpshared);
	#else
	#define INIT_PTHREAD_CONDATTR_GETPSHARED
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_CONDATTR_GETCLOCK
	INTERCEPTOR_PTHREAD_CONDATTR_GET(clock, sizeof(int))
	#define INIT_PTHREAD_CONDATTR_GETCLOCK \
	COMMON_INTERCEPT_FUNCTION(pthread_condattr_getclock);
	#else
	#define INIT_PTHREAD_CONDATTR_GETCLOCK
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_BARRIERATTR_GETPSHARED
	INTERCEPTOR_PTHREAD_BARRIERATTR_GET(pshared, sizeof(int)) // !mac !android
	#define INIT_PTHREAD_BARRIERATTR_GETPSHARED \
	COMMON_INTERCEPT_FUNCTION(pthread_barrierattr_getpshared);
	#else
	#define INIT_PTHREAD_BARRIERATTR_GETPSHARED
	#endif

	#if SANITIZER_INTERCEPT_TMPNAM
	INTERCEPTOR(char , tmpnam, char s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, tmpnam, s);
	char *res = REAL(tmpnam)(s);
	if (res) {
	if (s)
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
	else
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	}
	return res;
	}
	#define INIT_TMPNAM COMMON_INTERCEPT_FUNCTION(tmpnam);
	#else
	#define INIT_TMPNAM
	#endif

	#if SANITIZER_INTERCEPT_TMPNAM_R
	INTERCEPTOR(char , tmpnam_r, char s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, tmpnam_r, s);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(tmpnam_r)(s);
	if (res && s) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, s, internal_strlen(s) + 1);
	return res;
	}
	#define INIT_TMPNAM_R COMMON_INTERCEPT_FUNCTION(tmpnam_r);
	#else
	#define INIT_TMPNAM_R
	#endif

	#if SANITIZER_INTERCEPT_PTSNAME
	INTERCEPTOR(char *, ptsname, int fd) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ptsname, fd);
	char *res = REAL(ptsname)(fd);
	if (res != nullptr)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_PTSNAME COMMON_INTERCEPT_FUNCTION(ptsname);
	#else
	#define INIT_PTSNAME
	#endif

	#if SANITIZER_INTERCEPT_PTSNAME_R
	INTERCEPTOR(int, ptsname_r, int fd, char *name, SIZE_T namesize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ptsname_r, fd, name, namesize);
	int res = REAL(ptsname_r)(fd, name, namesize);
	if (res == 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
	return res;
	}
	#define INIT_PTSNAME_R COMMON_INTERCEPT_FUNCTION(ptsname_r);
	#else
	#define INIT_PTSNAME_R
	#endif

	#if SANITIZER_INTERCEPT_TTYNAME
	INTERCEPTOR(char *, ttyname, int fd) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ttyname, fd);
	char *res = REAL(ttyname)(fd);
	if (res != nullptr)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_TTYNAME COMMON_INTERCEPT_FUNCTION(ttyname);
	#else
	#define INIT_TTYNAME
	#endif

	#if SANITIZER_INTERCEPT_TTYNAME_R
	INTERCEPTOR(int, ttyname_r, int fd, char *name, SIZE_T namesize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ttyname_r, fd, name, namesize);
	int res = REAL(ttyname_r)(fd, name, namesize);
	if (res == 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
	return res;
	}
	#define INIT_TTYNAME_R COMMON_INTERCEPT_FUNCTION(ttyname_r);
	#else
	#define INIT_TTYNAME_R
	#endif

	#if SANITIZER_INTERCEPT_TEMPNAM
	INTERCEPTOR(char , tempnam, char dir, char *pfx) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, tempnam, dir, pfx);
	if (dir) COMMON_INTERCEPTOR_READ_RANGE(ctx, dir, internal_strlen(dir) + 1);
	if (pfx) COMMON_INTERCEPTOR_READ_RANGE(ctx, pfx, internal_strlen(pfx) + 1);
	char *res = REAL(tempnam)(dir, pfx);
	if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_TEMPNAM COMMON_INTERCEPT_FUNCTION(tempnam);
	#else
	#define INIT_TEMPNAM
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && !SANITIZER_NETBSD
	INTERCEPTOR(int, pthread_setname_np, uptr thread, const char *name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name);
	COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
	COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name);
	return REAL(pthread_setname_np)(thread, name);
	}
	#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
	#elif SANITIZER_INTERCEPT_PTHREAD_SETNAME_NP && SANITIZER_NETBSD
	INTERCEPTOR(int, pthread_setname_np, uptr thread, const char name, void arg) {
	void *ctx;
	char newname[32]; // PTHREAD_MAX_NAMELEN_NP=32
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_setname_np, thread, name, arg);
	COMMON_INTERCEPTOR_READ_STRING(ctx, name, 0);
	internal_snprintf(newname, sizeof(newname), name, arg);
	COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, newname);
	return REAL(pthread_setname_np)(thread, name, arg);
	}
	#define INIT_PTHREAD_SETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_setname_np);
	#else
	#define INIT_PTHREAD_SETNAME_NP
	#endif

	#if SANITIZER_INTERCEPT_PTHREAD_GETNAME_NP
	INTERCEPTOR(int, pthread_getname_np, uptr thread, char *name, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_getname_np, thread, name, len);
	int res = REAL(pthread_getname_np)(thread, name, len);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strnlen(name, len) + 1);
	return res;
	}
	#define INIT_PTHREAD_GETNAME_NP COMMON_INTERCEPT_FUNCTION(pthread_getname_np);
	#else
	#define INIT_PTHREAD_GETNAME_NP
	#endif

	#if SANITIZER_INTERCEPT_SINCOS
	INTERCEPTOR(void, sincos, double x, double sin, double cos) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sincos, x, sin, cos);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	REAL(sincos)(x, sin, cos);
	if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
	if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
	}
	INTERCEPTOR(void, sincosf, float x, float sin, float cos) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sincosf, x, sin, cos);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	REAL(sincosf)(x, sin, cos);
	if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
	if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
	}
	INTERCEPTOR(void, sincosl, long double x, long double sin, long double cos) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sincosl, x, sin, cos);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	REAL(sincosl)(x, sin, cos);
	if (sin) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sin, sizeof(*sin));
	if (cos) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cos, sizeof(*cos));
	}
	#define INIT_SINCOS \
	COMMON_INTERCEPT_FUNCTION(sincos); \
	COMMON_INTERCEPT_FUNCTION(sincosf); \
	COMMON_INTERCEPT_FUNCTION_LDBL(sincosl);
	#else
	#define INIT_SINCOS
	#endif

	#if SANITIZER_INTERCEPT_REMQUO
	INTERCEPTOR(double, remquo, double x, double y, int *quo) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, remquo, x, y, quo);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	double res = REAL(remquo)(x, y, quo);
	if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
	return res;
	}
	INTERCEPTOR(float, remquof, float x, float y, int *quo) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, remquof, x, y, quo);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	float res = REAL(remquof)(x, y, quo);
	if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
	return res;
	}
	#define INIT_REMQUO \
	COMMON_INTERCEPT_FUNCTION(remquo); \
	COMMON_INTERCEPT_FUNCTION(remquof);
	#else
	#define INIT_REMQUO
	#endif

	#if SANITIZER_INTERCEPT_REMQUOL
	INTERCEPTOR(long double, remquol, long double x, long double y, int *quo) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, remquol, x, y, quo);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	long double res = REAL(remquol)(x, y, quo);
	if (quo) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, quo, sizeof(*quo));
	return res;
	}
	#define INIT_REMQUOL \
	COMMON_INTERCEPT_FUNCTION_LDBL(remquol);
	#else
	#define INIT_REMQUOL
	#endif

	#if SANITIZER_INTERCEPT_LGAMMA
	extern int signgam;
	INTERCEPTOR(double, lgamma, double x) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgamma, x);
	double res = REAL(lgamma)(x);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
	return res;
	}
	INTERCEPTOR(float, lgammaf, float x) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgammaf, x);
	float res = REAL(lgammaf)(x);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
	return res;
	}
	#define INIT_LGAMMA \
	COMMON_INTERCEPT_FUNCTION(lgamma); \
	COMMON_INTERCEPT_FUNCTION(lgammaf);
	#else
	#define INIT_LGAMMA
	#endif

	#if SANITIZER_INTERCEPT_LGAMMAL
	INTERCEPTOR(long double, lgammal, long double x) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgammal, x);
	long double res = REAL(lgammal)(x);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, &signgam, sizeof(signgam));
	return res;
	}
	#define INIT_LGAMMAL \
	COMMON_INTERCEPT_FUNCTION_LDBL(lgammal);
	#else
	#define INIT_LGAMMAL
	#endif

	#if SANITIZER_INTERCEPT_LGAMMA_R
	INTERCEPTOR(double, lgamma_r, double x, int *signp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgamma_r, x, signp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	double res = REAL(lgamma_r)(x, signp);
	if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
	return res;
	}
	INTERCEPTOR(float, lgammaf_r, float x, int *signp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgammaf_r, x, signp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	float res = REAL(lgammaf_r)(x, signp);
	if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
	return res;
	}
	#define INIT_LGAMMA_R \
	COMMON_INTERCEPT_FUNCTION(lgamma_r); \
	COMMON_INTERCEPT_FUNCTION(lgammaf_r);
	#else
	#define INIT_LGAMMA_R
	#endif

	#if SANITIZER_INTERCEPT_LGAMMAL_R
	INTERCEPTOR(long double, lgammal_r, long double x, int *signp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgammal_r, x, signp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	long double res = REAL(lgammal_r)(x, signp);
	if (signp) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, signp, sizeof(*signp));
	return res;
	}
	#define INIT_LGAMMAL_R COMMON_INTERCEPT_FUNCTION_LDBL(lgammal_r);
	#else
	#define INIT_LGAMMAL_R
	#endif

	#if SANITIZER_INTERCEPT_DRAND48_R
	INTERCEPTOR(int, drand48_r, void buffer, double result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, drand48_r, buffer, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(drand48_r)(buffer, result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	INTERCEPTOR(int, lrand48_r, void buffer, long result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lrand48_r, buffer, result);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(lrand48_r)(buffer, result);
	if (result) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(*result));
	return res;
	}
	#define INIT_DRAND48_R \
	COMMON_INTERCEPT_FUNCTION(drand48_r); \
	COMMON_INTERCEPT_FUNCTION(lrand48_r);
	#else
	#define INIT_DRAND48_R
	#endif

	#if SANITIZER_INTERCEPT_RAND_R
	INTERCEPTOR(int, rand_r, unsigned *seedp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, rand_r, seedp);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, seedp, sizeof(*seedp));
	return REAL(rand_r)(seedp);
	}
	#define INIT_RAND_R COMMON_INTERCEPT_FUNCTION(rand_r);
	#else
	#define INIT_RAND_R
	#endif

	#if SANITIZER_INTERCEPT_GETLINE
	INTERCEPTOR(SSIZE_T, getline, char *lineptr, SIZE_T n, void *stream) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getline, lineptr, n, stream);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(getline)(lineptr, n, stream);
	if (res > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1);
	}
	return res;
	}

	// FIXME: under ASan the call below may write to freed memory and corrupt its
	// metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#define GETDELIM_INTERCEPTOR_IMPL(vname) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, vname, lineptr, n, delim, stream); \
	SSIZE_T res = REAL(vname)(lineptr, n, delim, stream); \
	if (res > 0) { \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineptr, sizeof(*lineptr)); \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n)); \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *lineptr, res + 1); \
	} \
	return res; \
	}

	INTERCEPTOR(SSIZE_T, __getdelim, char *lineptr, SIZE_T n, int delim,
	void *stream)
	GETDELIM_INTERCEPTOR_IMPL(__getdelim)

	// There's no __getdelim() on FreeBSD so we supply the getdelim() interceptor
	// with its own body.
	INTERCEPTOR(SSIZE_T, getdelim, char *lineptr, SIZE_T n, int delim,
	void *stream)
	GETDELIM_INTERCEPTOR_IMPL(getdelim)

	#define INIT_GETLINE \
	COMMON_INTERCEPT_FUNCTION(getline); \
	COMMON_INTERCEPT_FUNCTION(__getdelim); \
	COMMON_INTERCEPT_FUNCTION(getdelim);
	#else
	#define INIT_GETLINE
	#endif

	#if SANITIZER_INTERCEPT_ICONV
	INTERCEPTOR(SIZE_T, iconv, void cd, char inbuf, SIZE_T inbytesleft,
	char *outbuf, SIZE_T outbytesleft) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, iconv, cd, inbuf, inbytesleft, outbuf,
	outbytesleft);
	if (inbytesleft)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, inbytesleft, sizeof(*inbytesleft));
	if (inbuf && inbytesleft)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, inbuf, inbytesleft);
	if (outbytesleft)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, outbytesleft, sizeof(*outbytesleft));
	void outbuf_orig = outbuf ? outbuf : nullptr;
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SIZE_T res = REAL(iconv)(cd, inbuf, inbytesleft, outbuf, outbytesleft);
	if (outbuf && *outbuf > outbuf_orig) {
	SIZE_T sz = (char )outbuf - (char *)outbuf_orig;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, outbuf_orig, sz);
	}
	return res;
	}
	#define INIT_ICONV COMMON_INTERCEPT_FUNCTION(iconv);
	#else
	#define INIT_ICONV
	#endif

	#if SANITIZER_INTERCEPT_TIMES
	INTERCEPTOR(__sanitizer_clock_t, times, void *tms) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, times, tms);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_clock_t res = REAL(times)(tms);
	if (res != (__sanitizer_clock_t)-1 && tms)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tms, struct_tms_sz);
	return res;
	}
	#define INIT_TIMES COMMON_INTERCEPT_FUNCTION(times);
	#else
	#define INIT_TIMES
	#endif

	#if SANITIZER_S390 && \
	(SANITIZER_INTERCEPT_TLS_GET_ADDR \|\| SANITIZER_INTERCEPT_TLS_GET_OFFSET)
	extern "C" uptr __tls_get_offset_wrapper(void arg, uptr (fn)(void *arg));
	DEFINE_REAL(uptr, __tls_get_offset, void *arg)
	#endif

	#if SANITIZER_INTERCEPT_TLS_GET_ADDR
	#if !SANITIZER_S390
	#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_addr)
	// If you see any crashes around this functions, there are 2 known issues with
	// it: 1. __tls_get_addr can be called with mis-aligned stack due to:
	// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
	// 2. It can be called recursively if sanitizer code uses __tls_get_addr
	// to access thread local variables (it should not happen normally,
	// because sanitizers use initial-exec tls model).
	INTERCEPTOR(void , __tls_get_addr, void arg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr, arg);
	void *res = REAL(__tls_get_addr)(arg);
	uptr tls_begin, tls_end;
	COMMON_INTERCEPTOR_GET_TLS_RANGE(&tls_begin, &tls_end);
	DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, res, tls_begin, tls_end);
	if (dtv) {
	// New DTLS block has been allocated.
	COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
	}
	return res;
	}
	#if SANITIZER_PPC
	// On PowerPC, we also need to intercept __tls_get_addr_opt, which has
	// mostly the same semantics as __tls_get_addr, but its presence enables
	// some optimizations in linker (which are safe to ignore here).
	extern "C" __attribute__((alias("__interceptor___tls_get_addr"),
	visibility("default")))
	void __tls_get_addr_opt(void arg);
	#endif
	#else // SANITIZER_S390
	// On s390, we have to intercept two functions here:
	// - __tls_get_addr_internal, which is a glibc-internal function that is like
	// the usual __tls_get_addr, but returns a TP-relative offset instead of
	// a proper pointer. It is used by dlsym for TLS symbols.
	// - __tls_get_offset, which is like the above, but also takes a GOT-relative
	// descriptor offset as an argument instead of a pointer. GOT address
	// is passed in r12, so it's necessary to write it in assembly. This is
	// the function used by the compiler.
	#define INIT_TLS_GET_ADDR COMMON_INTERCEPT_FUNCTION(__tls_get_offset)
	INTERCEPTOR(uptr, __tls_get_addr_internal, void *arg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __tls_get_addr_internal, arg);
	uptr res = __tls_get_offset_wrapper(arg, REAL(__tls_get_offset));
	uptr tp = reinterpret_cast<uptr>(__builtin_thread_pointer());
	void ptr = reinterpret_cast<void >(res + tp);
	uptr tls_begin, tls_end;
	COMMON_INTERCEPTOR_GET_TLS_RANGE(&tls_begin, &tls_end);
	DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, ptr, tls_begin, tls_end);
	if (dtv) {
	// New DTLS block has been allocated.
	COMMON_INTERCEPTOR_INITIALIZE_RANGE((void *)dtv->beg, dtv->size);
	}
	return res;
	}
	#endif // SANITIZER_S390
	#else
	#define INIT_TLS_GET_ADDR
	#endif

	#if SANITIZER_S390 && \
	(SANITIZER_INTERCEPT_TLS_GET_ADDR \|\| SANITIZER_INTERCEPT_TLS_GET_OFFSET)
	extern "C" uptr __tls_get_offset(void *arg);
	extern "C" uptr __interceptor___tls_get_offset(void *arg);
	// We need a hidden symbol aliasing the above, so that we can jump
	// directly to it from the assembly below.
	extern "C" __attribute__((alias("__interceptor___tls_get_addr_internal"),
	visibility("hidden")))
	uptr __tls_get_addr_hidden(void *arg);
	// Now carefully intercept __tls_get_offset.
	asm(
	".text\n"
	// The __intercept_ version has to exist, so that gen_dynamic_list.py
	// exports our symbol.
	".weak __tls_get_offset\n"
	".type __tls_get_offset, @function\n"
	"__tls_get_offset:\n"
	".global __interceptor___tls_get_offset\n"
	".type __interceptor___tls_get_offset, @function\n"
	"__interceptor___tls_get_offset:\n"
	#ifdef __s390x__
	"la %r2, 0(%r2,%r12)\n"
	"jg __tls_get_addr_hidden\n"
	#else
	"basr %r3,0\n"
	"0: la %r2,0(%r2,%r12)\n"
	"l %r4,1f-0b(%r3)\n"
	"b 0(%r4,%r3)\n"
	"1: .long __tls_get_addr_hidden - 0b\n"
	#endif
	".size __interceptor___tls_get_offset, .-__interceptor___tls_get_offset\n"
	// Assembly wrapper to call REAL(__tls_get_offset)(arg)
	".type __tls_get_offset_wrapper, @function\n"
	"__tls_get_offset_wrapper:\n"
	#ifdef __s390x__
	"sgr %r2,%r12\n"
	#else
	"sr %r2,%r12\n"
	#endif
	"br %r3\n"
	".size __tls_get_offset_wrapper, .-__tls_get_offset_wrapper\n"
	);
	#endif

	#if SANITIZER_INTERCEPT_LISTXATTR
	INTERCEPTOR(SSIZE_T, listxattr, const char path, char list, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, listxattr, path, list, size);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(listxattr)(path, list, size);
	// Here and below, size == 0 is a special case where nothing is written to the
	// buffer, and res contains the desired buffer size.
	if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
	return res;
	}
	INTERCEPTOR(SSIZE_T, llistxattr, const char path, char list, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, llistxattr, path, list, size);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(llistxattr)(path, list, size);
	if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
	return res;
	}
	INTERCEPTOR(SSIZE_T, flistxattr, int fd, char *list, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, flistxattr, fd, list, size);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(flistxattr)(fd, list, size);
	if (size && res > 0 && list) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, list, res);
	return res;
	}
	#define INIT_LISTXATTR \
	COMMON_INTERCEPT_FUNCTION(listxattr); \
	COMMON_INTERCEPT_FUNCTION(llistxattr); \
	COMMON_INTERCEPT_FUNCTION(flistxattr);
	#else
	#define INIT_LISTXATTR
	#endif

	#if SANITIZER_INTERCEPT_GETXATTR
	INTERCEPTOR(SSIZE_T, getxattr, const char path, const char name, char *value,
	SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getxattr, path, name, value, size);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(getxattr)(path, name, value, size);
	if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
	return res;
	}
	INTERCEPTOR(SSIZE_T, lgetxattr, const char path, const char name, char *value,
	SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lgetxattr, path, name, value, size);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(lgetxattr)(path, name, value, size);
	if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
	return res;
	}
	INTERCEPTOR(SSIZE_T, fgetxattr, int fd, const char name, char value,
	SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetxattr, fd, name, value, size);
	if (name) COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	SSIZE_T res = REAL(fgetxattr)(fd, name, value, size);
	if (size && res > 0 && value) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, res);
	return res;
	}
	#define INIT_GETXATTR \
	COMMON_INTERCEPT_FUNCTION(getxattr); \
	COMMON_INTERCEPT_FUNCTION(lgetxattr); \
	COMMON_INTERCEPT_FUNCTION(fgetxattr);
	#else
	#define INIT_GETXATTR
	#endif

	#if SANITIZER_INTERCEPT_GETRESID
	INTERCEPTOR(int, getresuid, void ruid, void euid, void *suid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getresuid, ruid, euid, suid);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getresuid)(ruid, euid, suid);
	if (res >= 0) {
	if (ruid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ruid, uid_t_sz);
	if (euid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, euid, uid_t_sz);
	if (suid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, suid, uid_t_sz);
	}
	return res;
	}
	INTERCEPTOR(int, getresgid, void rgid, void egid, void *sgid) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getresgid, rgid, egid, sgid);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getresgid)(rgid, egid, sgid);
	if (res >= 0) {
	if (rgid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rgid, gid_t_sz);
	if (egid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, egid, gid_t_sz);
	if (sgid) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sgid, gid_t_sz);
	}
	return res;
	}
	#define INIT_GETRESID \
	COMMON_INTERCEPT_FUNCTION(getresuid); \
	COMMON_INTERCEPT_FUNCTION(getresgid);
	#else
	#define INIT_GETRESID
	#endif

	#if SANITIZER_INTERCEPT_GETIFADDRS
	// As long as getifaddrs()/freeifaddrs() use calloc()/free(), we don't need to
	// intercept freeifaddrs(). If that ceases to be the case, we might need to
	// intercept it to poison the memory again.
	INTERCEPTOR(int, getifaddrs, __sanitizer_ifaddrs **ifap) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getifaddrs, ifap);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(getifaddrs)(ifap);
	if (res == 0 && ifap) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifap, sizeof(void *));
	__sanitizer_ifaddrs p = ifap;
	while (p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(__sanitizer_ifaddrs));
	if (p->ifa_name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_name,
	internal_strlen(p->ifa_name) + 1);
	if (p->ifa_addr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_addr, struct_sockaddr_sz);
	if (p->ifa_netmask)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_netmask, struct_sockaddr_sz);
	// On Linux this is a union, but the other member also points to a
	// struct sockaddr, so the following is sufficient.
	if (p->ifa_dstaddr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->ifa_dstaddr, struct_sockaddr_sz);
	// FIXME(smatveev): Unpoison p->ifa_data as well.
	p = p->ifa_next;
	}
	}
	return res;
	}
	#define INIT_GETIFADDRS \
	COMMON_INTERCEPT_FUNCTION(getifaddrs);
	#else
	#define INIT_GETIFADDRS
	#endif

	#if SANITIZER_INTERCEPT_IF_INDEXTONAME
	INTERCEPTOR(char , if_indextoname, unsigned int ifindex, char ifname) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, if_indextoname, ifindex, ifname);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	char *res = REAL(if_indextoname)(ifindex, ifname);
	if (res && ifname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
	return res;
	}
	INTERCEPTOR(unsigned int, if_nametoindex, const char* ifname) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, if_nametoindex, ifname);
	if (ifname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ifname, internal_strlen(ifname) + 1);
	return REAL(if_nametoindex)(ifname);
	}
	#define INIT_IF_INDEXTONAME \
	COMMON_INTERCEPT_FUNCTION(if_indextoname); \
	COMMON_INTERCEPT_FUNCTION(if_nametoindex);
	#else
	#define INIT_IF_INDEXTONAME
	#endif

	#if SANITIZER_INTERCEPT_CAPGET
	INTERCEPTOR(int, capget, void hdrp, void datap) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, capget, hdrp, datap);
	if (hdrp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, hdrp, __user_cap_header_struct_sz);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(capget)(hdrp, datap);
	if (res == 0 && datap)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datap, __user_cap_data_struct_sz);
	// We can also return -1 and write to hdrp->version if the version passed in
	// hdrp->version is unsupported. But that's not a trivial condition to check,
	// and anyway COMMON_INTERCEPTOR_READ_RANGE protects us to some extent.
	return res;
	}
	INTERCEPTOR(int, capset, void hdrp, const void datap) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, capset, hdrp, datap);
	if (hdrp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, hdrp, __user_cap_header_struct_sz);
	if (datap)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, datap, __user_cap_data_struct_sz);
	return REAL(capset)(hdrp, datap);
	}
	#define INIT_CAPGET \
	COMMON_INTERCEPT_FUNCTION(capget); \
	COMMON_INTERCEPT_FUNCTION(capset);
	#else
	#define INIT_CAPGET
	#endif

	#if SANITIZER_INTERCEPT_AEABI_MEM
	INTERCEPTOR(void , __aeabi_memmove, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
	}

	INTERCEPTOR(void , __aeabi_memmove4, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
	}

	INTERCEPTOR(void , __aeabi_memmove8, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMMOVE_IMPL(ctx, to, from, size);
	}

	INTERCEPTOR(void , __aeabi_memcpy, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
	}

	INTERCEPTOR(void , __aeabi_memcpy4, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
	}

	INTERCEPTOR(void , __aeabi_memcpy8, void to, const void *from, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMCPY_IMPL(ctx, to, from, size);
	}

	// Note the argument order.
	INTERCEPTOR(void , __aeabi_memset, void block, uptr size, int c) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
	}

	INTERCEPTOR(void , __aeabi_memset4, void block, uptr size, int c) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
	}

	INTERCEPTOR(void , __aeabi_memset8, void block, uptr size, int c) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, c, size);
	}

	INTERCEPTOR(void , __aeabi_memclr, void block, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
	}

	INTERCEPTOR(void , __aeabi_memclr4, void block, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
	}

	INTERCEPTOR(void , __aeabi_memclr8, void block, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
	}

	#define INIT_AEABI_MEM \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memmove); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memmove4); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memmove8); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy4); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memcpy8); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memset); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memset4); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memset8); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memclr); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memclr4); \
	COMMON_INTERCEPT_FUNCTION(__aeabi_memclr8);
	#else
	#define INIT_AEABI_MEM
	#endif // SANITIZER_INTERCEPT_AEABI_MEM

	#if SANITIZER_INTERCEPT___BZERO
	INTERCEPTOR(void , __bzero, void block, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
	}
	#define INIT___BZERO COMMON_INTERCEPT_FUNCTION(__bzero);
	#else
	#define INIT___BZERO
	#endif // SANITIZER_INTERCEPT___BZERO

	#if SANITIZER_INTERCEPT_BZERO
	INTERCEPTOR(void , bzero, void block, uptr size) {
	void *ctx;
	COMMON_INTERCEPTOR_MEMSET_IMPL(ctx, block, 0, size);
	}
	#define INIT_BZERO COMMON_INTERCEPT_FUNCTION(bzero);
	#else
	#define INIT_BZERO
	#endif // SANITIZER_INTERCEPT_BZERO

	#if SANITIZER_INTERCEPT_FTIME
	INTERCEPTOR(int, ftime, __sanitizer_timeb *tp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ftime, tp);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(ftime)(tp);
	if (tp)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, tp, sizeof(*tp));
	return res;
	}
	#define INIT_FTIME COMMON_INTERCEPT_FUNCTION(ftime);
	#else
	#define INIT_FTIME
	#endif // SANITIZER_INTERCEPT_FTIME

	#if SANITIZER_INTERCEPT_XDR
	INTERCEPTOR(void, xdrmem_create, __sanitizer_XDR *xdrs, uptr addr,
	unsigned size, int op) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdrmem_create, xdrs, addr, size, op);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	REAL(xdrmem_create)(xdrs, addr, size, op);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
	if (op == __sanitizer_XDR_ENCODE) {
	// It's not obvious how much data individual xdr_ routines write.
	// Simply unpoison the entire target buffer in advance.
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, (void *)addr, size);
	}
	}

	INTERCEPTOR(void, xdrstdio_create, __sanitizer_XDR xdrs, void file, int op) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdrstdio_create, xdrs, file, op);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	REAL(xdrstdio_create)(xdrs, file, op);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdrs, sizeof(*xdrs));
	}

	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	#define XDR_INTERCEPTOR(F, T) \
	INTERCEPTOR(int, F, __sanitizer_XDR xdrs, T p) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, F, xdrs, p); \
	if (p && xdrs->x_op == __sanitizer_XDR_ENCODE) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p)); \
	int res = REAL(F)(xdrs, p); \
	if (res && p && xdrs->x_op == __sanitizer_XDR_DECODE) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p)); \
	return res; \
	}

	XDR_INTERCEPTOR(xdr_short, short)
	XDR_INTERCEPTOR(xdr_u_short, unsigned short)
	XDR_INTERCEPTOR(xdr_int, int)
	XDR_INTERCEPTOR(xdr_u_int, unsigned)
	XDR_INTERCEPTOR(xdr_long, long)
	XDR_INTERCEPTOR(xdr_u_long, unsigned long)
	XDR_INTERCEPTOR(xdr_hyper, long long)
	XDR_INTERCEPTOR(xdr_u_hyper, unsigned long long)
	XDR_INTERCEPTOR(xdr_longlong_t, long long)
	XDR_INTERCEPTOR(xdr_u_longlong_t, unsigned long long)
	XDR_INTERCEPTOR(xdr_int8_t, u8)
	XDR_INTERCEPTOR(xdr_uint8_t, u8)
	XDR_INTERCEPTOR(xdr_int16_t, u16)
	XDR_INTERCEPTOR(xdr_uint16_t, u16)
	XDR_INTERCEPTOR(xdr_int32_t, u32)
	XDR_INTERCEPTOR(xdr_uint32_t, u32)
	XDR_INTERCEPTOR(xdr_int64_t, u64)
	XDR_INTERCEPTOR(xdr_uint64_t, u64)
	XDR_INTERCEPTOR(xdr_quad_t, long long)
	XDR_INTERCEPTOR(xdr_u_quad_t, unsigned long long)
	XDR_INTERCEPTOR(xdr_bool, bool)
	XDR_INTERCEPTOR(xdr_enum, int)
	XDR_INTERCEPTOR(xdr_char, char)
	XDR_INTERCEPTOR(xdr_u_char, unsigned char)
	XDR_INTERCEPTOR(xdr_float, float)
	XDR_INTERCEPTOR(xdr_double, double)

	// FIXME: intercept xdr_array, opaque, union, vector, reference, pointer,
	// wrapstring, sizeof

	INTERCEPTOR(int, xdr_bytes, __sanitizer_XDR xdrs, char p, unsigned sizep,
	unsigned maxsize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdr_bytes, xdrs, p, sizep, maxsize);
	if (p && sizep && xdrs->x_op == __sanitizer_XDR_ENCODE) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sizep, sizeof(*sizep));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizep);
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(xdr_bytes)(xdrs, p, sizep, maxsize);
	if (p && sizep && xdrs->x_op == __sanitizer_XDR_DECODE) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizep, sizeof(*sizep));
	if (res && p && sizep) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizep);
	}
	return res;
	}

	INTERCEPTOR(int, xdr_string, __sanitizer_XDR xdrs, char *p,
	unsigned maxsize) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdr_string, xdrs, p, maxsize);
	if (p && xdrs->x_op == __sanitizer_XDR_ENCODE) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, p, sizeof(*p));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, p, internal_strlen(p) + 1);
	}
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	int res = REAL(xdr_string)(xdrs, p, maxsize);
	if (p && xdrs->x_op == __sanitizer_XDR_DECODE) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));
	if (res && *p)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, internal_strlen(p) + 1);
	}
	return res;
	}

	#define INIT_XDR \
	COMMON_INTERCEPT_FUNCTION(xdrmem_create); \
	COMMON_INTERCEPT_FUNCTION(xdrstdio_create); \
	COMMON_INTERCEPT_FUNCTION(xdr_short); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_short); \
	COMMON_INTERCEPT_FUNCTION(xdr_int); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_int); \
	COMMON_INTERCEPT_FUNCTION(xdr_long); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_long); \
	COMMON_INTERCEPT_FUNCTION(xdr_hyper); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_hyper); \
	COMMON_INTERCEPT_FUNCTION(xdr_longlong_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_longlong_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_int8_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_uint8_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_int16_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_uint16_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_int32_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_uint32_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_int64_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_uint64_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_quad_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_quad_t); \
	COMMON_INTERCEPT_FUNCTION(xdr_bool); \
	COMMON_INTERCEPT_FUNCTION(xdr_enum); \
	COMMON_INTERCEPT_FUNCTION(xdr_char); \
	COMMON_INTERCEPT_FUNCTION(xdr_u_char); \
	COMMON_INTERCEPT_FUNCTION(xdr_float); \
	COMMON_INTERCEPT_FUNCTION(xdr_double); \
	COMMON_INTERCEPT_FUNCTION(xdr_bytes); \
	COMMON_INTERCEPT_FUNCTION(xdr_string);
	#else
	#define INIT_XDR
	#endif // SANITIZER_INTERCEPT_XDR

	#if SANITIZER_INTERCEPT_XDRREC
	typedef int (xdrrec_cb)(char, char*, int);
	struct XdrRecWrapper {
	char *handle;
	xdrrec_cb rd, wr;
	};
	typedef AddrHashMap<XdrRecWrapper *, 11> XdrRecWrapMap;
	static XdrRecWrapMap *xdrrec_wrap_map;

	static int xdrrec_wr_wrap(char handle, char buf, int count) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(buf, count);
	XdrRecWrapper wrap = (XdrRecWrapper )handle;
	return wrap->wr(wrap->handle, buf, count);
	}

	static int xdrrec_rd_wrap(char handle, char buf, int count) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	XdrRecWrapper wrap = (XdrRecWrapper )handle;
	return wrap->rd(wrap->handle, buf, count);
	}

	// This doesn't apply to the solaris version as it has a different function
	// signature.
	INTERCEPTOR(void, xdrrec_create, __sanitizer_XDR *xdr, unsigned sndsize,
	unsigned rcvsize, char handle, int (rd)(char, char, int),
	int (wr)(char, char*, int)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdrrec_create, xdr, sndsize, rcvsize,
	handle, rd, wr);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, &xdr->x_op, sizeof xdr->x_op);

	// We can't allocate a wrapper on the stack, as the handle is used outside
	// this stack frame. So we put it on the heap, and keep track of it with
	// the HashMap (keyed by x_private). When we later need to xdr_destroy,
	// we can index the map, free the wrapper, and then clean the map entry.
	XdrRecWrapper *wrap_data =
	(XdrRecWrapper *)InternalAlloc(sizeof(XdrRecWrapper));
	wrap_data->handle = handle;
	wrap_data->rd = rd;
	wrap_data->wr = wr;
	if (wr)
	wr = xdrrec_wr_wrap;
	if (rd)
	rd = xdrrec_rd_wrap;
	handle = (char *)wrap_data;

	REAL(xdrrec_create)(xdr, sndsize, rcvsize, handle, rd, wr);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, xdr, sizeof *xdr);

	XdrRecWrapMap::Handle wrap(xdrrec_wrap_map, xdr->x_private, false, true);
	*wrap = wrap_data;
	}

	// We have to intercept this to be able to free wrapper memory;
	// otherwise it's not necessary.
	INTERCEPTOR(void, xdr_destroy, __sanitizer_XDR *xdr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, xdr_destroy, xdr);

	XdrRecWrapMap::Handle wrap(xdrrec_wrap_map, xdr->x_private, true);
	InternalFree(*wrap);
	REAL(xdr_destroy)(xdr);
	}
	#define INIT_XDRREC_LINUX \
	static u64 xdrrec_wrap_mem[sizeof(XdrRecWrapMap) / sizeof(u64) + 1]; \
	xdrrec_wrap_map = new ((void *)&xdrrec_wrap_mem) XdrRecWrapMap(); \
	COMMON_INTERCEPT_FUNCTION(xdrrec_create); \
	COMMON_INTERCEPT_FUNCTION(xdr_destroy);
	#else
	#define INIT_XDRREC_LINUX
	#endif

	#if SANITIZER_INTERCEPT_TSEARCH
	INTERCEPTOR(void , tsearch, void key, void **rootp,
	int (compar)(const void , const void *)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, tsearch, key, rootp, compar);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	void *res = REAL(tsearch)(key, rootp, compar);
	if (res && (void *)res == key)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, sizeof(void *));
	return res;
	}
	#define INIT_TSEARCH COMMON_INTERCEPT_FUNCTION(tsearch);
	#else
	#define INIT_TSEARCH
	#endif

	#if SANITIZER_INTERCEPT_LIBIO_INTERNALS \|\| SANITIZER_INTERCEPT_FOPEN \|\| \
	SANITIZER_INTERCEPT_OPEN_MEMSTREAM
	void unpoison_file(__sanitizer_FILE *fp) {
	#if SANITIZER_HAS_STRUCT_FILE
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp, sizeof(*fp));
	#if SANITIZER_NETBSD
	if (fp->_bf._base && fp->_bf._size > 0)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_bf._base,
	fp->_bf._size);
	#else
	if (fp->_IO_read_base && fp->_IO_read_base < fp->_IO_read_end)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_IO_read_base,
	fp->_IO_read_end - fp->_IO_read_base);
	if (fp->_IO_write_base && fp->_IO_write_base < fp->_IO_write_end)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(fp->_IO_write_base,
	fp->_IO_write_end - fp->_IO_write_base);
	#endif
	#endif // SANITIZER_HAS_STRUCT_FILE
	}
	#endif

	#if SANITIZER_INTERCEPT_LIBIO_INTERNALS
	// These guys are called when a .c source is built with -O2.
	INTERCEPTOR(int, __uflow, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __uflow, fp);
	int res = REAL(__uflow)(fp);
	unpoison_file(fp);
	return res;
	}
	INTERCEPTOR(int, __underflow, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __underflow, fp);
	int res = REAL(__underflow)(fp);
	unpoison_file(fp);
	return res;
	}
	INTERCEPTOR(int, __overflow, __sanitizer_FILE *fp, int ch) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __overflow, fp, ch);
	int res = REAL(__overflow)(fp, ch);
	unpoison_file(fp);
	return res;
	}
	INTERCEPTOR(int, __wuflow, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __wuflow, fp);
	int res = REAL(__wuflow)(fp);
	unpoison_file(fp);
	return res;
	}
	INTERCEPTOR(int, __wunderflow, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __wunderflow, fp);
	int res = REAL(__wunderflow)(fp);
	unpoison_file(fp);
	return res;
	}
	INTERCEPTOR(int, __woverflow, __sanitizer_FILE *fp, int ch) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __woverflow, fp, ch);
	int res = REAL(__woverflow)(fp, ch);
	unpoison_file(fp);
	return res;
	}
	#define INIT_LIBIO_INTERNALS \
	COMMON_INTERCEPT_FUNCTION(__uflow); \
	COMMON_INTERCEPT_FUNCTION(__underflow); \
	COMMON_INTERCEPT_FUNCTION(__overflow); \
	COMMON_INTERCEPT_FUNCTION(__wuflow); \
	COMMON_INTERCEPT_FUNCTION(__wunderflow); \
	COMMON_INTERCEPT_FUNCTION(__woverflow);
	#else
	#define INIT_LIBIO_INTERNALS
	#endif

	#if SANITIZER_INTERCEPT_FOPEN
	INTERCEPTOR(__sanitizer_FILE , fopen, const char path, const char *mode) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fopen, path, mode);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
	__sanitizer_FILE *res = REAL(fopen)(path, mode);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
	if (res) unpoison_file(res);
	return res;
	}
	INTERCEPTOR(__sanitizer_FILE , fdopen, int fd, const char mode) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fdopen, fd, mode);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
	__sanitizer_FILE *res = REAL(fdopen)(fd, mode);
	if (res) unpoison_file(res);
	return res;
	}
	INTERCEPTOR(__sanitizer_FILE , freopen, const char path, const char *mode,
	__sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, freopen, path, mode, fp);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
	COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
	__sanitizer_FILE *res = REAL(freopen)(path, mode, fp);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
	if (res) unpoison_file(res);
	return res;
	}
	#define INIT_FOPEN \
	COMMON_INTERCEPT_FUNCTION(fopen); \
	COMMON_INTERCEPT_FUNCTION(fdopen); \
	COMMON_INTERCEPT_FUNCTION(freopen);
	#else
	#define INIT_FOPEN
	#endif

	#if SANITIZER_INTERCEPT_FLOPEN
	INTERCEPTOR(int, flopen, const char *path, int flags, ...) {
	void *ctx;
	va_list ap;
	va_start(ap, flags);
	u16 mode = static_cast<u16>(va_arg(ap, u32));
	va_end(ap);
	COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
	if (path) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	}
	return REAL(flopen)(path, flags, mode);
	}

	INTERCEPTOR(int, flopenat, int dirfd, const char *path, int flags, ...) {
	void *ctx;
	va_list ap;
	va_start(ap, flags);
	u16 mode = static_cast<u16>(va_arg(ap, u32));
	va_end(ap);
	COMMON_INTERCEPTOR_ENTER(ctx, flopen, path, flags, mode);
	if (path) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	}
	return REAL(flopenat)(dirfd, path, flags, mode);
	}

	#define INIT_FLOPEN \
	COMMON_INTERCEPT_FUNCTION(flopen); \
	COMMON_INTERCEPT_FUNCTION(flopenat);
	#else
	#define INIT_FLOPEN
	#endif

	#if SANITIZER_INTERCEPT_FOPEN64
	INTERCEPTOR(__sanitizer_FILE , fopen64, const char path, const char *mode) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fopen64, path, mode);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
	__sanitizer_FILE *res = REAL(fopen64)(path, mode);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
	if (res) unpoison_file(res);
	return res;
	}
	INTERCEPTOR(__sanitizer_FILE , freopen64, const char path, const char *mode,
	__sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, freopen64, path, mode, fp);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, mode, internal_strlen(mode) + 1);
	COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
	__sanitizer_FILE *res = REAL(freopen64)(path, mode, fp);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, path);
	if (res) unpoison_file(res);
	return res;
	}
	#define INIT_FOPEN64 \
	COMMON_INTERCEPT_FUNCTION(fopen64); \
	COMMON_INTERCEPT_FUNCTION(freopen64);
	#else
	#define INIT_FOPEN64
	#endif

	#if SANITIZER_INTERCEPT_OPEN_MEMSTREAM
	INTERCEPTOR(__sanitizer_FILE , open_memstream, char ptr, SIZE_T sizeloc) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, open_memstream, ptr, sizeloc);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_FILE *res = REAL(open_memstream)(ptr, sizeloc);
	if (res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, sizeof(*ptr));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizeloc, sizeof(*sizeloc));
	unpoison_file(res);
	FileMetadata file = {ptr, sizeloc};
	SetInterceptorMetadata(res, file);
	}
	return res;
	}
	INTERCEPTOR(__sanitizer_FILE , open_wmemstream, wchar_t *ptr,
	SIZE_T *sizeloc) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, open_wmemstream, ptr, sizeloc);
	__sanitizer_FILE *res = REAL(open_wmemstream)(ptr, sizeloc);
	if (res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, sizeof(*ptr));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sizeloc, sizeof(*sizeloc));
	unpoison_file(res);
	FileMetadata file = {(char **)ptr, sizeloc};
	SetInterceptorMetadata(res, file);
	}
	return res;
	}
	INTERCEPTOR(__sanitizer_FILE , fmemopen, void buf, SIZE_T size,
	const char *mode) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fmemopen, buf, size, mode);
	// FIXME: under ASan the call below may write to freed memory and corrupt
	// its metadata. See
	// https://github.com/google/sanitizers/issues/321.
	__sanitizer_FILE *res = REAL(fmemopen)(buf, size, mode);
	if (res) unpoison_file(res);
	return res;
	}
	#define INIT_OPEN_MEMSTREAM \
	COMMON_INTERCEPT_FUNCTION(open_memstream); \
	COMMON_INTERCEPT_FUNCTION(open_wmemstream); \
	COMMON_INTERCEPT_FUNCTION(fmemopen);
	#else
	#define INIT_OPEN_MEMSTREAM
	#endif

	#if SANITIZER_INTERCEPT_OBSTACK
	static void initialize_obstack(__sanitizer_obstack *obstack) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(obstack, sizeof(*obstack));
	if (obstack->chunk)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(obstack->chunk,
	sizeof(*obstack->chunk));
	}

	INTERCEPTOR(int, _obstack_begin_1, __sanitizer_obstack *obstack, int sz,
	int align, void (alloc_fn)(uptr arg, uptr sz),
	void (free_fn)(uptr arg, void p)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, _obstack_begin_1, obstack, sz, align, alloc_fn,
	free_fn);
	int res = REAL(_obstack_begin_1)(obstack, sz, align, alloc_fn, free_fn);
	if (res) initialize_obstack(obstack);
	return res;
	}
	INTERCEPTOR(int, _obstack_begin, __sanitizer_obstack *obstack, int sz,
	int align, void (alloc_fn)(uptr sz), void (free_fn)(void p)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, _obstack_begin, obstack, sz, align, alloc_fn,
	free_fn);
	int res = REAL(_obstack_begin)(obstack, sz, align, alloc_fn, free_fn);
	if (res) initialize_obstack(obstack);
	return res;
	}
	INTERCEPTOR(void, _obstack_newchunk, __sanitizer_obstack *obstack, int length) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, _obstack_newchunk, obstack, length);
	REAL(_obstack_newchunk)(obstack, length);
	if (obstack->chunk)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(
	obstack->chunk, obstack->next_free - (char *)obstack->chunk);
	}
	#define INIT_OBSTACK \
	COMMON_INTERCEPT_FUNCTION(_obstack_begin_1); \
	COMMON_INTERCEPT_FUNCTION(_obstack_begin); \
	COMMON_INTERCEPT_FUNCTION(_obstack_newchunk);
	#else
	#define INIT_OBSTACK
	#endif

	#if SANITIZER_INTERCEPT_FFLUSH
	INTERCEPTOR(int, fflush, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fflush, fp);
	if (fp)
	unpoison_file(fp);
	int res = REAL(fflush)(fp);
	// FIXME: handle fp == NULL
	if (fp) {
	const FileMetadata *m = GetInterceptorMetadata(fp);
	if (m) COMMON_INTERCEPTOR_INITIALIZE_RANGE(m->addr, m->size);
	}
	return res;
	}
	#define INIT_FFLUSH COMMON_INTERCEPT_FUNCTION(fflush);
	#else
	#define INIT_FFLUSH
	#endif

	#if SANITIZER_INTERCEPT_FCLOSE
	INTERCEPTOR(int, fclose, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fclose, fp);
	COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
	const FileMetadata *m = GetInterceptorMetadata(fp);
	if (fp)
	unpoison_file(fp);
	int res = REAL(fclose)(fp);
	if (m) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(m->addr, m->size);
	DeleteInterceptorMetadata(fp);
	}
	return res;
	}
	#define INIT_FCLOSE COMMON_INTERCEPT_FUNCTION(fclose);
	#else
	#define INIT_FCLOSE
	#endif

	#if SANITIZER_INTERCEPT_DLOPEN_DLCLOSE
	INTERCEPTOR(void, dlopen, const char filename, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlopen, filename, flag);
	if (filename) COMMON_INTERCEPTOR_READ_STRING(ctx, filename, 0);
	void *res = COMMON_INTERCEPTOR_DLOPEN(filename, flag);
	Symbolizer::GetOrInit()->InvalidateModuleList();
	COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, res);
	return res;
	}

	INTERCEPTOR(int, dlclose, void *handle) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, dlclose, handle);
	int res = REAL(dlclose)(handle);
	Symbolizer::GetOrInit()->InvalidateModuleList();
	COMMON_INTERCEPTOR_LIBRARY_UNLOADED();
	return res;
	}
	#define INIT_DLOPEN_DLCLOSE \
	COMMON_INTERCEPT_FUNCTION(dlopen); \
	COMMON_INTERCEPT_FUNCTION(dlclose);
	#else
	#define INIT_DLOPEN_DLCLOSE
	#endif

	#if SANITIZER_INTERCEPT_GETPASS
	INTERCEPTOR(char , getpass, const char prompt) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getpass, prompt);
	if (prompt)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, prompt, internal_strlen(prompt)+1);
	char *res = REAL(getpass)(prompt);
	if (res) COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res)+1);
	return res;
	}

	#define INIT_GETPASS COMMON_INTERCEPT_FUNCTION(getpass);
	#else
	#define INIT_GETPASS
	#endif

	#if SANITIZER_INTERCEPT_TIMERFD
	INTERCEPTOR(int, timerfd_settime, int fd, int flags, void *new_value,
	void *old_value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, timerfd_settime, fd, flags, new_value,
	old_value);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, new_value, struct_itimerspec_sz);
	int res = REAL(timerfd_settime)(fd, flags, new_value, old_value);
	if (res != -1 && old_value)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, old_value, struct_itimerspec_sz);
	return res;
	}

	INTERCEPTOR(int, timerfd_gettime, int fd, void *curr_value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, timerfd_gettime, fd, curr_value);
	int res = REAL(timerfd_gettime)(fd, curr_value);
	if (res != -1 && curr_value)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, curr_value, struct_itimerspec_sz);
	return res;
	}
	#define INIT_TIMERFD \
	COMMON_INTERCEPT_FUNCTION(timerfd_settime); \
	COMMON_INTERCEPT_FUNCTION(timerfd_gettime);
	#else
	#define INIT_TIMERFD
	#endif

	#if SANITIZER_INTERCEPT_MLOCKX
	// Linux kernel has a bug that leads to kernel deadlock if a process
	// maps TBs of memory and then calls mlock().
	static void MlockIsUnsupported() {
	static atomic_uint8_t printed;
	if (atomic_exchange(&printed, 1, memory_order_relaxed))
	return;
	VPrintf(1, "%s ignores mlock/mlockall/munlock/munlockall\n",
	SanitizerToolName);
	}

	INTERCEPTOR(int, mlock, const void *addr, uptr len) {
	MlockIsUnsupported();
	return 0;
	}

	INTERCEPTOR(int, munlock, const void *addr, uptr len) {
	MlockIsUnsupported();
	return 0;
	}

	INTERCEPTOR(int, mlockall, int flags) {
	MlockIsUnsupported();
	return 0;
	}

	INTERCEPTOR(int, munlockall, void) {
	MlockIsUnsupported();
	return 0;
	}

	#define INIT_MLOCKX \
	COMMON_INTERCEPT_FUNCTION(mlock); \
	COMMON_INTERCEPT_FUNCTION(munlock); \
	COMMON_INTERCEPT_FUNCTION(mlockall); \
	COMMON_INTERCEPT_FUNCTION(munlockall);

	#else
	#define INIT_MLOCKX
	#endif // SANITIZER_INTERCEPT_MLOCKX

	#if SANITIZER_INTERCEPT_FOPENCOOKIE
	struct WrappedCookie {
	void *real_cookie;
	__sanitizer_cookie_io_functions_t real_io_funcs;
	};

	static uptr wrapped_read(void cookie, char buf, uptr size) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedCookie wrapped_cookie = (WrappedCookie )cookie;
	__sanitizer_cookie_io_read real_read = wrapped_cookie->real_io_funcs.read;
	return real_read ? real_read(wrapped_cookie->real_cookie, buf, size) : 0;
	}

	static uptr wrapped_write(void cookie, const char buf, uptr size) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedCookie wrapped_cookie = (WrappedCookie )cookie;
	__sanitizer_cookie_io_write real_write = wrapped_cookie->real_io_funcs.write;
	return real_write ? real_write(wrapped_cookie->real_cookie, buf, size) : size;
	}

	static int wrapped_seek(void cookie, u64 offset, int whence) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(offset, sizeof(*offset));
	WrappedCookie wrapped_cookie = (WrappedCookie )cookie;
	__sanitizer_cookie_io_seek real_seek = wrapped_cookie->real_io_funcs.seek;
	return real_seek ? real_seek(wrapped_cookie->real_cookie, offset, whence)
	: -1;
	}

	static int wrapped_close(void *cookie) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	WrappedCookie wrapped_cookie = (WrappedCookie )cookie;
	__sanitizer_cookie_io_close real_close = wrapped_cookie->real_io_funcs.close;
	int res = real_close ? real_close(wrapped_cookie->real_cookie) : 0;
	InternalFree(wrapped_cookie);
	return res;
	}

	INTERCEPTOR(__sanitizer_FILE , fopencookie, void cookie, const char *mode,
	__sanitizer_cookie_io_functions_t io_funcs) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fopencookie, cookie, mode, io_funcs);
	WrappedCookie *wrapped_cookie =
	(WrappedCookie *)InternalAlloc(sizeof(WrappedCookie));
	wrapped_cookie->real_cookie = cookie;
	wrapped_cookie->real_io_funcs = io_funcs;
	__sanitizer_FILE *res =
	REAL(fopencookie)(wrapped_cookie, mode, {wrapped_read, wrapped_write,
	wrapped_seek, wrapped_close});
	return res;
	}

	#define INIT_FOPENCOOKIE COMMON_INTERCEPT_FUNCTION(fopencookie);
	#else
	#define INIT_FOPENCOOKIE
	#endif // SANITIZER_INTERCEPT_FOPENCOOKIE

	#if SANITIZER_INTERCEPT_SEM
	INTERCEPTOR(int, sem_init, __sanitizer_sem_t *s, int pshared, unsigned value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_init, s, pshared, value);
	// Workaround a bug in glibc's "old" semaphore implementation by
	// zero-initializing the sem_t contents. This has to be done here because
	- // interceptors bind to the lowest symbols version by default, hitting the
	+ // interceptors bind to the lowest version before glibc 2.36, hitting the
	// buggy code path while the non-sanitized build of the same code works fine.
	REAL(memset)(s, 0, sizeof(*s));
	int res = REAL(sem_init)(s, pshared, value);
	return res;
	}

	INTERCEPTOR(int, sem_destroy, __sanitizer_sem_t *s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_destroy, s);
	int res = REAL(sem_destroy)(s);
	return res;
	}

	INTERCEPTOR(int, sem_wait, __sanitizer_sem_t *s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_wait, s);
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(sem_wait)(s);
	if (res == 0) {
	COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
	}
	return res;
	}

	INTERCEPTOR(int, sem_trywait, __sanitizer_sem_t *s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_trywait, s);
	int res = REAL(sem_trywait)(s);
	if (res == 0) {
	COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
	}
	return res;
	}

	INTERCEPTOR(int, sem_timedwait, __sanitizer_sem_t s, void abstime) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_timedwait, s, abstime);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, abstime, struct_timespec_sz);
	int res = COMMON_INTERCEPTOR_BLOCK_REAL(sem_timedwait)(s, abstime);
	if (res == 0) {
	COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
	}
	return res;
	}

	INTERCEPTOR(int, sem_post, __sanitizer_sem_t *s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_post, s);
	COMMON_INTERCEPTOR_RELEASE(ctx, (uptr)s);
	int res = REAL(sem_post)(s);
	return res;
	}

	INTERCEPTOR(int, sem_getvalue, __sanitizer_sem_t s, int sval) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_getvalue, s, sval);
	int res = REAL(sem_getvalue)(s, sval);
	if (res == 0) {
	COMMON_INTERCEPTOR_ACQUIRE(ctx, (uptr)s);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sval, sizeof(*sval));
	}
	return res;
	}

	INTERCEPTOR(__sanitizer_sem_t , sem_open, const char name, int oflag, ...) {
	void *ctx;
	va_list ap;
	va_start(ap, oflag);
	u32 mode = va_arg(ap, u32);
	u32 value = va_arg(ap, u32);
	COMMON_INTERCEPTOR_ENTER(ctx, sem_open, name, oflag, mode, value);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	__sanitizer_sem_t *s = REAL(sem_open)(name, oflag, mode, value);
	if (s)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(s, sizeof(*s));
	va_end(ap);
	return s;
	}

	INTERCEPTOR(int, sem_unlink, const char *name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sem_unlink, name);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	return REAL(sem_unlink)(name);
	}

	# define INIT_SEM \
	COMMON_INTERCEPT_FUNCTION(sem_init); \
	COMMON_INTERCEPT_FUNCTION(sem_destroy); \
	COMMON_INTERCEPT_FUNCTION(sem_wait); \
	COMMON_INTERCEPT_FUNCTION(sem_trywait); \
	COMMON_INTERCEPT_FUNCTION(sem_timedwait); \
	COMMON_INTERCEPT_FUNCTION(sem_post); \
	COMMON_INTERCEPT_FUNCTION(sem_getvalue); \
	COMMON_INTERCEPT_FUNCTION(sem_open); \
	COMMON_INTERCEPT_FUNCTION(sem_unlink);
	#else
	# define INIT_SEM
	#endif // SANITIZER_INTERCEPT_SEM

	#if SANITIZER_INTERCEPT_PTHREAD_SETCANCEL
	INTERCEPTOR(int, pthread_setcancelstate, int state, int *oldstate) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_setcancelstate, state, oldstate);
	int res = REAL(pthread_setcancelstate)(state, oldstate);
	if (res == 0 && oldstate != nullptr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldstate, sizeof(*oldstate));
	return res;
	}

	INTERCEPTOR(int, pthread_setcanceltype, int type, int *oldtype) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pthread_setcanceltype, type, oldtype);
	int res = REAL(pthread_setcanceltype)(type, oldtype);
	if (res == 0 && oldtype != nullptr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldtype, sizeof(*oldtype));
	return res;
	}
	#define INIT_PTHREAD_SETCANCEL \
	COMMON_INTERCEPT_FUNCTION(pthread_setcancelstate); \
	COMMON_INTERCEPT_FUNCTION(pthread_setcanceltype);
	#else
	#define INIT_PTHREAD_SETCANCEL
	#endif

	#if SANITIZER_INTERCEPT_MINCORE
	INTERCEPTOR(int, mincore, void addr, uptr length, unsigned char vec) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mincore, addr, length, vec);
	int res = REAL(mincore)(addr, length, vec);
	if (res == 0) {
	uptr page_size = GetPageSizeCached();
	uptr vec_size = ((length + page_size - 1) & (~(page_size - 1))) / page_size;
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, vec, vec_size);
	}
	return res;
	}
	#define INIT_MINCORE COMMON_INTERCEPT_FUNCTION(mincore);
	#else
	#define INIT_MINCORE
	#endif

	#if SANITIZER_INTERCEPT_PROCESS_VM_READV
	INTERCEPTOR(SSIZE_T, process_vm_readv, int pid, __sanitizer_iovec *local_iov,
	uptr liovcnt, __sanitizer_iovec *remote_iov, uptr riovcnt,
	uptr flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, process_vm_readv, pid, local_iov, liovcnt,
	remote_iov, riovcnt, flags);
	SSIZE_T res = REAL(process_vm_readv)(pid, local_iov, liovcnt, remote_iov,
	riovcnt, flags);
	if (res > 0)
	write_iovec(ctx, local_iov, liovcnt, res);
	return res;
	}

	INTERCEPTOR(SSIZE_T, process_vm_writev, int pid, __sanitizer_iovec *local_iov,
	uptr liovcnt, __sanitizer_iovec *remote_iov, uptr riovcnt,
	uptr flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, process_vm_writev, pid, local_iov, liovcnt,
	remote_iov, riovcnt, flags);
	SSIZE_T res = REAL(process_vm_writev)(pid, local_iov, liovcnt, remote_iov,
	riovcnt, flags);
	if (res > 0)
	read_iovec(ctx, local_iov, liovcnt, res);
	return res;
	}
	#define INIT_PROCESS_VM_READV \
	COMMON_INTERCEPT_FUNCTION(process_vm_readv); \
	COMMON_INTERCEPT_FUNCTION(process_vm_writev);
	#else
	#define INIT_PROCESS_VM_READV
	#endif

	#if SANITIZER_INTERCEPT_CTERMID
	INTERCEPTOR(char , ctermid, char s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ctermid, s);
	char *res = REAL(ctermid)(s);
	if (res) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	}
	return res;
	}
	#define INIT_CTERMID COMMON_INTERCEPT_FUNCTION(ctermid);
	#else
	#define INIT_CTERMID
	#endif

	#if SANITIZER_INTERCEPT_CTERMID_R
	INTERCEPTOR(char , ctermid_r, char s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ctermid_r, s);
	char *res = REAL(ctermid_r)(s);
	if (res) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	}
	return res;
	}
	#define INIT_CTERMID_R COMMON_INTERCEPT_FUNCTION(ctermid_r);
	#else
	#define INIT_CTERMID_R
	#endif

	#if SANITIZER_INTERCEPT_RECV_RECVFROM
	INTERCEPTOR(SSIZE_T, recv, int fd, void *buf, SIZE_T len, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, recv, fd, buf, len, flags);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	SSIZE_T res = REAL(recv)(fd, buf, len, flags);
	if (res > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
	}
	if (res >= 0 && fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	return res;
	}

	INTERCEPTOR(SSIZE_T, recvfrom, int fd, void *buf, SIZE_T len, int flags,
	void srcaddr, int addrlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, recvfrom, fd, buf, len, flags, srcaddr,
	addrlen);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	SIZE_T srcaddr_sz;
	if (srcaddr) srcaddr_sz = *addrlen;
	(void)srcaddr_sz; // prevent "set but not used" warning
	SSIZE_T res = REAL(recvfrom)(fd, buf, len, flags, srcaddr, addrlen);
	if (res > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, Min((SIZE_T)res, len));
	if (res >= 0 && srcaddr)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(srcaddr,
	Min((SIZE_T)*addrlen, srcaddr_sz));
	return res;
	}
	#define INIT_RECV_RECVFROM \
	COMMON_INTERCEPT_FUNCTION(recv); \
	COMMON_INTERCEPT_FUNCTION(recvfrom);
	#else
	#define INIT_RECV_RECVFROM
	#endif

	#if SANITIZER_INTERCEPT_SEND_SENDTO
	INTERCEPTOR(SSIZE_T, send, int fd, void *buf, SIZE_T len, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, send, fd, buf, len, flags);
	if (fd >= 0) {
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	}
	SSIZE_T res = REAL(send)(fd, buf, len, flags);
	if (common_flags()->intercept_send && res > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
	return res;
	}

	INTERCEPTOR(SSIZE_T, sendto, int fd, void *buf, SIZE_T len, int flags,
	void *dstaddr, int addrlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sendto, fd, buf, len, flags, dstaddr, addrlen);
	if (fd >= 0) {
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	}
	// Can't check dstaddr as it may have uninitialized padding at the end.
	SSIZE_T res = REAL(sendto)(fd, buf, len, flags, dstaddr, addrlen);
	if (common_flags()->intercept_send && res > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, buf, Min((SIZE_T)res, len));
	return res;
	}
	#define INIT_SEND_SENDTO \
	COMMON_INTERCEPT_FUNCTION(send); \
	COMMON_INTERCEPT_FUNCTION(sendto);
	#else
	#define INIT_SEND_SENDTO
	#endif

	#if SANITIZER_INTERCEPT_EVENTFD_READ_WRITE
	INTERCEPTOR(int, eventfd_read, int fd, u64 *value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, eventfd_read, fd, value);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	int res = REAL(eventfd_read)(fd, value);
	if (res == 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, value, sizeof(*value));
	if (fd >= 0) COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	}
	return res;
	}
	INTERCEPTOR(int, eventfd_write, int fd, u64 value) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, eventfd_write, fd, value);
	if (fd >= 0) {
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd);
	}
	int res = REAL(eventfd_write)(fd, value);
	return res;
	}
	#define INIT_EVENTFD_READ_WRITE \
	COMMON_INTERCEPT_FUNCTION(eventfd_read); \
	COMMON_INTERCEPT_FUNCTION(eventfd_write)
	#else
	#define INIT_EVENTFD_READ_WRITE
	#endif

	#if SANITIZER_INTERCEPT_STAT
	INTERCEPTOR(int, stat, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, stat, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(stat)(path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
	return res;
	}
	#define INIT_STAT COMMON_INTERCEPT_FUNCTION(stat)
	#else
	#define INIT_STAT
	#endif

	#if SANITIZER_INTERCEPT_STAT64
	INTERCEPTOR(int, stat64, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, stat64, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(stat64)(path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
	return res;
	}
	#define INIT_STAT64 COMMON_INTERCEPT_FUNCTION(stat64)
	#else
	#define INIT_STAT64
	#endif


	#if SANITIZER_INTERCEPT_LSTAT
	INTERCEPTOR(int, lstat, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lstat, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(lstat)(path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
	return res;
	}
	#define INIT_LSTAT COMMON_INTERCEPT_FUNCTION(lstat)
	#else
	#define INIT_LSTAT
	#endif

	#if SANITIZER_INTERCEPT_STAT64
	INTERCEPTOR(int, lstat64, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, lstat64, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(lstat64)(path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
	return res;
	}
	#define INIT_LSTAT64 COMMON_INTERCEPT_FUNCTION(lstat64)
	#else
	#define INIT_LSTAT64
	#endif

	#if SANITIZER_INTERCEPT___XSTAT
	INTERCEPTOR(int, __xstat, int version, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __xstat, version, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(__xstat)(version, path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
	return res;
	}
	#define INIT___XSTAT COMMON_INTERCEPT_FUNCTION(__xstat)
	#else
	#define INIT___XSTAT
	#endif

	#if SANITIZER_INTERCEPT___XSTAT64
	INTERCEPTOR(int, __xstat64, int version, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __xstat64, version, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(__xstat64)(version, path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
	return res;
	}
	#define INIT___XSTAT64 COMMON_INTERCEPT_FUNCTION(__xstat64)
	#else
	#define INIT___XSTAT64
	#endif

	#if SANITIZER_INTERCEPT___LXSTAT
	INTERCEPTOR(int, __lxstat, int version, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __lxstat, version, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(__lxstat)(version, path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat_sz);
	return res;
	}
	#define INIT___LXSTAT COMMON_INTERCEPT_FUNCTION(__lxstat)
	#else
	#define INIT___LXSTAT
	#endif

	#if SANITIZER_INTERCEPT___LXSTAT64
	INTERCEPTOR(int, __lxstat64, int version, const char path, void buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __lxstat64, version, path, buf);
	if (common_flags()->intercept_stat)
	COMMON_INTERCEPTOR_READ_STRING(ctx, path, 0);
	int res = REAL(__lxstat64)(version, path, buf);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer::struct_stat64_sz);
	return res;
	}
	#define INIT___LXSTAT64 COMMON_INTERCEPT_FUNCTION(__lxstat64)
	#else
	#define INIT___LXSTAT64
	#endif

	// FIXME: add other *stat interceptor

	#if SANITIZER_INTERCEPT_UTMP
	INTERCEPTOR(void *, getutent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutent, dummy);
	void *res = REAL(getutent)(dummy);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
	return res;
	}
	INTERCEPTOR(void , getutid, void ut) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutid, ut);
	void *res = REAL(getutid)(ut);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
	return res;
	}
	INTERCEPTOR(void , getutline, void ut) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutline, ut);
	void *res = REAL(getutline)(ut);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmp_sz);
	return res;
	}
	#define INIT_UTMP \
	COMMON_INTERCEPT_FUNCTION(getutent); \
	COMMON_INTERCEPT_FUNCTION(getutid); \
	COMMON_INTERCEPT_FUNCTION(getutline);
	#else
	#define INIT_UTMP
	#endif

	#if SANITIZER_INTERCEPT_UTMPX
	INTERCEPTOR(void *, getutxent, int dummy) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutxent, dummy);
	void *res = REAL(getutxent)(dummy);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
	return res;
	}
	INTERCEPTOR(void , getutxid, void ut) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutxid, ut);
	void *res = REAL(getutxid)(ut);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
	return res;
	}
	INTERCEPTOR(void , getutxline, void ut) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getutxline, ut);
	void *res = REAL(getutxline)(ut);
	if (res)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, __sanitizer::struct_utmpx_sz);
	return res;
	}
	INTERCEPTOR(void , pututxline, const void ut) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pututxline, ut);
	if (ut)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ut, __sanitizer::struct_utmpx_sz);
	void *res = REAL(pututxline)(ut);
	if (res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, __sanitizer::struct_utmpx_sz);
	return res;
	}
	#define INIT_UTMPX \
	COMMON_INTERCEPT_FUNCTION(getutxent); \
	COMMON_INTERCEPT_FUNCTION(getutxid); \
	COMMON_INTERCEPT_FUNCTION(getutxline); \
	COMMON_INTERCEPT_FUNCTION(pututxline);
	#else
	#define INIT_UTMPX
	#endif

	#if SANITIZER_INTERCEPT_GETLOADAVG
	INTERCEPTOR(int, getloadavg, double *loadavg, int nelem) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getloadavg, loadavg, nelem);
	int res = REAL(getloadavg)(loadavg, nelem);
	if (res > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, loadavg, res * sizeof(*loadavg));
	return res;
	}
	#define INIT_GETLOADAVG \
	COMMON_INTERCEPT_FUNCTION(getloadavg);
	#else
	#define INIT_GETLOADAVG
	#endif

	#if SANITIZER_INTERCEPT_MCHECK_MPROBE
	INTERCEPTOR(int, mcheck, void (*abortfunc)(int mstatus)) {
	return 0;
	}

	INTERCEPTOR(int, mcheck_pedantic, void (*abortfunc)(int mstatus)) {
	return 0;
	}

	INTERCEPTOR(int, mprobe, void *ptr) {
	return 0;
	}
	#endif

	INTERCEPTOR(SIZE_T, wcslen, const wchar_t *s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcslen, s);
	SIZE_T res = REAL(wcslen)(s);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (res + 1));
	return res;
	}

	INTERCEPTOR(SIZE_T, wcsnlen, const wchar_t *s, SIZE_T n) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcsnlen, s, n);
	SIZE_T res = REAL(wcsnlen)(s, n);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * Min(res + 1, n));
	return res;
	}
	#define INIT_WCSLEN \
	COMMON_INTERCEPT_FUNCTION(wcslen); \
	COMMON_INTERCEPT_FUNCTION(wcsnlen);

	#if SANITIZER_INTERCEPT_WCSCAT
	INTERCEPTOR(wchar_t , wcscat, wchar_t dst, const wchar_t *src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcscat, dst, src);
	SIZE_T src_size = internal_wcslen(src);
	SIZE_T dst_size = internal_wcslen(dst);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, (src_size + 1) * sizeof(wchar_t));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
	(src_size + 1) * sizeof(wchar_t));
	return REAL(wcscat)(dst, src);
	}

	INTERCEPTOR(wchar_t , wcsncat, wchar_t dst, const wchar_t *src, SIZE_T n) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcsncat, dst, src, n);
	SIZE_T src_size = internal_wcsnlen(src, n);
	SIZE_T dst_size = internal_wcslen(dst);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src,
	Min(src_size + 1, n) * sizeof(wchar_t));
	COMMON_INTERCEPTOR_READ_RANGE(ctx, dst, (dst_size + 1) * sizeof(wchar_t));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst + dst_size,
	(src_size + 1) * sizeof(wchar_t));
	return REAL(wcsncat)(dst, src, n);
	}
	#define INIT_WCSCAT \
	COMMON_INTERCEPT_FUNCTION(wcscat); \
	COMMON_INTERCEPT_FUNCTION(wcsncat);
	#else
	#define INIT_WCSCAT
	#endif

	#if SANITIZER_INTERCEPT_WCSDUP
	INTERCEPTOR(wchar_t , wcsdup, wchar_t s) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, wcsdup, s);
	SIZE_T len = internal_wcslen(s);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, s, sizeof(wchar_t) * (len + 1));
	wchar_t *result = REAL(wcsdup)(s);
	if (result)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof(wchar_t) * (len + 1));
	return result;
	}

	#define INIT_WCSDUP COMMON_INTERCEPT_FUNCTION(wcsdup);
	#else
	#define INIT_WCSDUP
	#endif

	#if SANITIZER_INTERCEPT_STRXFRM
	static SIZE_T RealStrLen(const char *str) { return internal_strlen(str); }

	static SIZE_T RealStrLen(const wchar_t *str) { return internal_wcslen(str); }

	#define STRXFRM_INTERCEPTOR_IMPL(strxfrm, dest, src, len, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, strxfrm, dest, src, len, ##__VA_ARGS__); \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, \
	sizeof(src) (RealStrLen(src) + 1)); \
	SIZE_T res = REAL(strxfrm)(dest, src, len, ##__VA_ARGS__); \
	if (res < len) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dest, sizeof(src) (res + 1)); \
	return res; \
	}

	INTERCEPTOR(SIZE_T, strxfrm, char dest, const char src, SIZE_T len) {
	STRXFRM_INTERCEPTOR_IMPL(strxfrm, dest, src, len);
	}

	INTERCEPTOR(SIZE_T, strxfrm_l, char dest, const char src, SIZE_T len,
	void *locale) {
	STRXFRM_INTERCEPTOR_IMPL(strxfrm_l, dest, src, len, locale);
	}

	#define INIT_STRXFRM \
	COMMON_INTERCEPT_FUNCTION(strxfrm); \
	COMMON_INTERCEPT_FUNCTION(strxfrm_l);
	#else
	#define INIT_STRXFRM
	#endif

	#if SANITIZER_INTERCEPT___STRXFRM_L
	INTERCEPTOR(SIZE_T, __strxfrm_l, char dest, const char src, SIZE_T len,
	void *locale) {
	STRXFRM_INTERCEPTOR_IMPL(__strxfrm_l, dest, src, len, locale);
	}

	#define INIT___STRXFRM_L COMMON_INTERCEPT_FUNCTION(__strxfrm_l);
	#else
	#define INIT___STRXFRM_L
	#endif

	#if SANITIZER_INTERCEPT_WCSXFRM
	INTERCEPTOR(SIZE_T, wcsxfrm, wchar_t dest, const wchar_t src, SIZE_T len) {
	STRXFRM_INTERCEPTOR_IMPL(wcsxfrm, dest, src, len);
	}

	INTERCEPTOR(SIZE_T, wcsxfrm_l, wchar_t dest, const wchar_t src, SIZE_T len,
	void *locale) {
	STRXFRM_INTERCEPTOR_IMPL(wcsxfrm_l, dest, src, len, locale);
	}

	#define INIT_WCSXFRM \
	COMMON_INTERCEPT_FUNCTION(wcsxfrm); \
	COMMON_INTERCEPT_FUNCTION(wcsxfrm_l);
	#else
	#define INIT_WCSXFRM
	#endif

	#if SANITIZER_INTERCEPT___WCSXFRM_L
	INTERCEPTOR(SIZE_T, __wcsxfrm_l, wchar_t dest, const wchar_t src, SIZE_T len,
	void *locale) {
	STRXFRM_INTERCEPTOR_IMPL(__wcsxfrm_l, dest, src, len, locale);
	}

	#define INIT___WCSXFRM_L COMMON_INTERCEPT_FUNCTION(__wcsxfrm_l);
	#else
	#define INIT___WCSXFRM_L
	#endif

	#if SANITIZER_INTERCEPT_ACCT
	INTERCEPTOR(int, acct, const char *file) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, acct, file);
	if (file)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
	return REAL(acct)(file);
	}
	#define INIT_ACCT COMMON_INTERCEPT_FUNCTION(acct)
	#else
	#define INIT_ACCT
	#endif

	#if SANITIZER_INTERCEPT_USER_FROM_UID
	INTERCEPTOR(const char *, user_from_uid, u32 uid, int nouser) {
	void *ctx;
	const char *user;
	COMMON_INTERCEPTOR_ENTER(ctx, user_from_uid, uid, nouser);
	user = REAL(user_from_uid)(uid, nouser);
	if (user)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, user, internal_strlen(user) + 1);
	return user;
	}
	#define INIT_USER_FROM_UID COMMON_INTERCEPT_FUNCTION(user_from_uid)
	#else
	#define INIT_USER_FROM_UID
	#endif

	#if SANITIZER_INTERCEPT_UID_FROM_USER
	INTERCEPTOR(int, uid_from_user, const char name, u32 uid) {
	void *ctx;
	int res;
	COMMON_INTERCEPTOR_ENTER(ctx, uid_from_user, name, uid);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	res = REAL(uid_from_user)(name, uid);
	if (uid)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, uid, sizeof(*uid));
	return res;
	}
	#define INIT_UID_FROM_USER COMMON_INTERCEPT_FUNCTION(uid_from_user)
	#else
	#define INIT_UID_FROM_USER
	#endif

	#if SANITIZER_INTERCEPT_GROUP_FROM_GID
	INTERCEPTOR(const char *, group_from_gid, u32 gid, int nogroup) {
	void *ctx;
	const char *group;
	COMMON_INTERCEPTOR_ENTER(ctx, group_from_gid, gid, nogroup);
	group = REAL(group_from_gid)(gid, nogroup);
	if (group)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, group, internal_strlen(group) + 1);
	return group;
	}
	#define INIT_GROUP_FROM_GID COMMON_INTERCEPT_FUNCTION(group_from_gid)
	#else
	#define INIT_GROUP_FROM_GID
	#endif

	#if SANITIZER_INTERCEPT_GID_FROM_GROUP
	INTERCEPTOR(int, gid_from_group, const char group, u32 gid) {
	void *ctx;
	int res;
	COMMON_INTERCEPTOR_ENTER(ctx, gid_from_group, group, gid);
	if (group)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, group, internal_strlen(group) + 1);
	res = REAL(gid_from_group)(group, gid);
	if (gid)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, gid, sizeof(*gid));
	return res;
	}
	#define INIT_GID_FROM_GROUP COMMON_INTERCEPT_FUNCTION(gid_from_group)
	#else
	#define INIT_GID_FROM_GROUP
	#endif

	#if SANITIZER_INTERCEPT_ACCESS
	INTERCEPTOR(int, access, const char *path, int mode) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, access, path, mode);
	if (path)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	return REAL(access)(path, mode);
	}
	#define INIT_ACCESS COMMON_INTERCEPT_FUNCTION(access)
	#else
	#define INIT_ACCESS
	#endif

	#if SANITIZER_INTERCEPT_FACCESSAT
	INTERCEPTOR(int, faccessat, int fd, const char *path, int mode, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, faccessat, fd, path, mode, flags);
	if (path)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	return REAL(faccessat)(fd, path, mode, flags);
	}
	#define INIT_FACCESSAT COMMON_INTERCEPT_FUNCTION(faccessat)
	#else
	#define INIT_FACCESSAT
	#endif

	#if SANITIZER_INTERCEPT_GETGROUPLIST
	INTERCEPTOR(int, getgrouplist, const char name, u32 basegid, u32 groups,
	int *ngroups) {
	void *ctx;
	int res;
	COMMON_INTERCEPTOR_ENTER(ctx, getgrouplist, name, basegid, groups, ngroups);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	if (ngroups)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ngroups, sizeof(*ngroups));
	res = REAL(getgrouplist)(name, basegid, groups, ngroups);
	if (!res && groups && ngroups) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, groups, sizeof(groups) (*ngroups));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ngroups, sizeof(*ngroups));
	}
	return res;
	}

	#define INIT_GETGROUPLIST COMMON_INTERCEPT_FUNCTION(getgrouplist);
	#else
	#define INIT_GETGROUPLIST
	#endif

	#if SANITIZER_INTERCEPT_GETGROUPMEMBERSHIP
	INTERCEPTOR(int, getgroupmembership, const char name, u32 basegid, u32 groups,
	int maxgrp, int *ngroups) {
	void *ctx;
	int res;
	COMMON_INTERCEPTOR_ENTER(ctx, getgroupmembership, name, basegid, groups,
	maxgrp, ngroups);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	res = REAL(getgroupmembership)(name, basegid, groups, maxgrp, ngroups);
	if (!res && groups && ngroups) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, groups, sizeof(groups) (*ngroups));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ngroups, sizeof(*ngroups));
	}
	return res;
	}

	#define INIT_GETGROUPMEMBERSHIP COMMON_INTERCEPT_FUNCTION(getgroupmembership);
	#else
	#define INIT_GETGROUPMEMBERSHIP
	#endif

	#if SANITIZER_INTERCEPT_READLINK
	INTERCEPTOR(SSIZE_T, readlink, const char path, char buf, SIZE_T bufsiz) {
	void* ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readlink, path, buf, bufsiz);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	SSIZE_T res = REAL(readlink)(path, buf, bufsiz);
	if (res > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
	return res;
	}

	#define INIT_READLINK COMMON_INTERCEPT_FUNCTION(readlink)
	#else
	#define INIT_READLINK
	#endif

	#if SANITIZER_INTERCEPT_READLINKAT
	INTERCEPTOR(SSIZE_T, readlinkat, int dirfd, const char path, char buf,
	SIZE_T bufsiz) {
	void* ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, readlinkat, dirfd, path, buf, bufsiz);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	SSIZE_T res = REAL(readlinkat)(dirfd, path, buf, bufsiz);
	if (res > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, res);
	return res;
	}

	#define INIT_READLINKAT COMMON_INTERCEPT_FUNCTION(readlinkat)
	#else
	#define INIT_READLINKAT
	#endif

	#if SANITIZER_INTERCEPT_NAME_TO_HANDLE_AT
	INTERCEPTOR(int, name_to_handle_at, int dirfd, const char *pathname,
	struct file_handle handle, int mount_id, int flags) {
	void* ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, name_to_handle_at, dirfd, pathname, handle,
	mount_id, flags);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pathname, internal_strlen(pathname) + 1);

	__sanitizer_file_handle *sanitizer_handle =
	reinterpret_cast<__sanitizer_file_handle*>(handle);
	COMMON_INTERCEPTOR_READ_RANGE(
	ctx, &sanitizer_handle->handle_bytes,
	sizeof(sanitizer_handle->handle_bytes));

	int res = REAL(name_to_handle_at)(dirfd, pathname, handle, mount_id, flags);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, &sanitizer_handle->handle_bytes,
	sizeof(sanitizer_handle->handle_bytes));
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, &sanitizer_handle->handle_type,
	sizeof(sanitizer_handle->handle_type));
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, &sanitizer_handle->f_handle, sanitizer_handle->handle_bytes);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mount_id, sizeof(*mount_id));
	}
	return res;
	}

	#define INIT_NAME_TO_HANDLE_AT COMMON_INTERCEPT_FUNCTION(name_to_handle_at)
	#else
	#define INIT_NAME_TO_HANDLE_AT
	#endif

	#if SANITIZER_INTERCEPT_OPEN_BY_HANDLE_AT
	INTERCEPTOR(int, open_by_handle_at, int mount_fd, struct file_handle* handle,
	int flags) {
	void* ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, open_by_handle_at, mount_fd, handle, flags);

	__sanitizer_file_handle *sanitizer_handle =
	reinterpret_cast<__sanitizer_file_handle*>(handle);
	COMMON_INTERCEPTOR_READ_RANGE(
	ctx, &sanitizer_handle->handle_bytes,
	sizeof(sanitizer_handle->handle_bytes));
	COMMON_INTERCEPTOR_READ_RANGE(
	ctx, &sanitizer_handle->handle_type,
	sizeof(sanitizer_handle->handle_type));
	COMMON_INTERCEPTOR_READ_RANGE(
	ctx, &sanitizer_handle->f_handle, sanitizer_handle->handle_bytes);

	return REAL(open_by_handle_at)(mount_fd, handle, flags);
	}

	#define INIT_OPEN_BY_HANDLE_AT COMMON_INTERCEPT_FUNCTION(open_by_handle_at)
	#else
	#define INIT_OPEN_BY_HANDLE_AT
	#endif

	#if SANITIZER_INTERCEPT_STRLCPY
	INTERCEPTOR(SIZE_T, strlcpy, char dst, char src, SIZE_T size) {
	void *ctx;
	SIZE_T res;
	COMMON_INTERCEPTOR_ENTER(ctx, strlcpy, dst, src, size);
	if (src) {
	// Keep strnlen as macro argument, as macro may ignore it.
	COMMON_INTERCEPTOR_READ_STRING(
	ctx, src, Min(internal_strnlen(src, size), size - 1) + 1);
	}
	res = REAL(strlcpy)(dst, src, size);
	COMMON_INTERCEPTOR_COPY_STRING(ctx, dst, src, internal_strlen(dst) + 1);
	return res;
	}

	INTERCEPTOR(SIZE_T, strlcat, char dst, char src, SIZE_T size) {
	void *ctx;
	SIZE_T len = 0;
	COMMON_INTERCEPTOR_ENTER(ctx, strlcat, dst, src, size);
	// src is checked in the strlcpy() interceptor
	if (dst) {
	len = internal_strnlen(dst, size);
	COMMON_INTERCEPTOR_READ_STRING(ctx, dst, Min(len, size - 1) + 1);
	}
	// Reuse the rest of the code in the strlcpy() interceptor
	return WRAP(strlcpy)(dst + len, src, size - len) + len;
	}
	#define INIT_STRLCPY \
	COMMON_INTERCEPT_FUNCTION(strlcpy); \
	COMMON_INTERCEPT_FUNCTION(strlcat);
	#else
	#define INIT_STRLCPY
	#endif

	#if SANITIZER_INTERCEPT_MMAP
	INTERCEPTOR(void , mmap, void addr, SIZE_T sz, int prot, int flags, int fd,
	OFF_T off) {
	void *ctx;
	if (common_flags()->detect_write_exec)
	ReportMmapWriteExec(prot, flags);
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
	COMMON_INTERCEPTOR_ENTER(ctx, mmap, addr, sz, prot, flags, fd, off);
	COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd, off);
	}

	INTERCEPTOR(int, mprotect, void *addr, SIZE_T sz, int prot) {
	void *ctx;
	if (common_flags()->detect_write_exec)
	ReportMmapWriteExec(prot, 0);
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return (int)internal_mprotect(addr, sz, prot);
	COMMON_INTERCEPTOR_ENTER(ctx, mprotect, addr, sz, prot);
	MprotectMallocZones(addr, prot);
	return REAL(mprotect)(addr, sz, prot);
	}
	#define INIT_MMAP \
	COMMON_INTERCEPT_FUNCTION(mmap); \
	COMMON_INTERCEPT_FUNCTION(mprotect);
	#else
	#define INIT_MMAP
	#endif

	#if SANITIZER_INTERCEPT_MMAP64
	INTERCEPTOR(void , mmap64, void addr, SIZE_T sz, int prot, int flags, int fd,
	OFF64_T off) {
	void *ctx;
	if (common_flags()->detect_write_exec)
	ReportMmapWriteExec(prot, flags);
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return (void *)internal_mmap(addr, sz, prot, flags, fd, off);
	COMMON_INTERCEPTOR_ENTER(ctx, mmap64, addr, sz, prot, flags, fd, off);
	COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap64, addr, sz, prot, flags, fd, off);
	}
	#define INIT_MMAP64 COMMON_INTERCEPT_FUNCTION(mmap64);
	#else
	#define INIT_MMAP64
	#endif

	#if SANITIZER_INTERCEPT_DEVNAME
	INTERCEPTOR(char *, devname, u64 dev, u32 type) {
	void *ctx;
	char *name;
	COMMON_INTERCEPTOR_ENTER(ctx, devname, dev, type);
	name = REAL(devname)(dev, type);
	if (name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
	return name;
	}
	#define INIT_DEVNAME COMMON_INTERCEPT_FUNCTION(devname);
	#else
	#define INIT_DEVNAME
	#endif

	#if SANITIZER_INTERCEPT_DEVNAME_R
	#if SANITIZER_NETBSD
	#define DEVNAME_R_RETTYPE int
	#define DEVNAME_R_SUCCESS(x) (!(x))
	#else
	#define DEVNAME_R_RETTYPE char*
	#define DEVNAME_R_SUCCESS(x) (x)
	#endif
	INTERCEPTOR(DEVNAME_R_RETTYPE, devname_r, u64 dev, u32 type, char *path,
	uptr len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, devname_r, dev, type, path, len);
	DEVNAME_R_RETTYPE res = REAL(devname_r)(dev, type, path, len);
	if (DEVNAME_R_SUCCESS(res))
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, path, internal_strlen(path) + 1);
	return res;
	}
	#define INIT_DEVNAME_R COMMON_INTERCEPT_FUNCTION(devname_r);
	#else
	#define INIT_DEVNAME_R
	#endif

	#if SANITIZER_INTERCEPT_FGETLN
	INTERCEPTOR(char , fgetln, __sanitizer_FILE stream, SIZE_T *len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetln, stream, len);
	char *str = REAL(fgetln)(stream, len);
	if (str && len) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, str, *len);
	}
	return str;
	}
	#define INIT_FGETLN COMMON_INTERCEPT_FUNCTION(fgetln)
	#else
	#define INIT_FGETLN
	#endif

	#if SANITIZER_INTERCEPT_STRMODE
	INTERCEPTOR(void, strmode, u32 mode, char *bp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strmode, mode, bp);
	REAL(strmode)(mode, bp);
	if (bp)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, bp, internal_strlen(bp) + 1);
	}
	#define INIT_STRMODE COMMON_INTERCEPT_FUNCTION(strmode)
	#else
	#define INIT_STRMODE
	#endif

	#if SANITIZER_INTERCEPT_TTYENT
	INTERCEPTOR(struct __sanitizer_ttyent *, getttyent, void) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getttyent);
	struct __sanitizer_ttyent *ttyent = REAL(getttyent)();
	if (ttyent)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ttyent, struct_ttyent_sz);
	return ttyent;
	}
	INTERCEPTOR(struct __sanitizer_ttyent , getttynam, char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getttynam, name);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	struct __sanitizer_ttyent *ttyent = REAL(getttynam)(name);
	if (ttyent)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ttyent, struct_ttyent_sz);
	return ttyent;
	}
	#define INIT_TTYENT \
	COMMON_INTERCEPT_FUNCTION(getttyent); \
	COMMON_INTERCEPT_FUNCTION(getttynam);
	#else
	#define INIT_TTYENT
	#endif

	#if SANITIZER_INTERCEPT_TTYENTPATH
	INTERCEPTOR(int, setttyentpath, char *path) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setttyentpath, path);
	if (path)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	return REAL(setttyentpath)(path);
	}
	#define INIT_TTYENTPATH COMMON_INTERCEPT_FUNCTION(setttyentpath);
	#else
	#define INIT_TTYENTPATH
	#endif

	#if SANITIZER_INTERCEPT_PROTOENT
	static void write_protoent(void ctx, struct __sanitizer_protoent p) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p, sizeof(*p));

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_name, internal_strlen(p->p_name) + 1);

	SIZE_T pp_size = 1; // One handles the trailing \0

	for (char *pp = p->p_aliases; pp; ++pp, ++pp_size)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pp, internal_strlen(pp) + 1);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, p->p_aliases,
	pp_size * sizeof(char **));
	}

	INTERCEPTOR(struct __sanitizer_protoent *, getprotoent) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotoent);
	struct __sanitizer_protoent *p = REAL(getprotoent)();
	if (p)
	write_protoent(ctx, p);
	return p;
	}

	INTERCEPTOR(struct __sanitizer_protoent , getprotobyname, const char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname, name);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	struct __sanitizer_protoent *p = REAL(getprotobyname)(name);
	if (p)
	write_protoent(ctx, p);
	return p;
	}

	INTERCEPTOR(struct __sanitizer_protoent *, getprotobynumber, int proto) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotobynumber, proto);
	struct __sanitizer_protoent *p = REAL(getprotobynumber)(proto);
	if (p)
	write_protoent(ctx, p);
	return p;
	}
	#define INIT_PROTOENT \
	COMMON_INTERCEPT_FUNCTION(getprotoent); \
	COMMON_INTERCEPT_FUNCTION(getprotobyname); \
	COMMON_INTERCEPT_FUNCTION(getprotobynumber)
	#else
	#define INIT_PROTOENT
	#endif

	#if SANITIZER_INTERCEPT_PROTOENT_R
	INTERCEPTOR(int, getprotoent_r, struct __sanitizer_protoent *result_buf,
	char buf, SIZE_T buflen, struct __sanitizer_protoent *result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotoent_r, result_buf, buf, buflen,
	result);
	int res = REAL(getprotoent_r)(result_buf, buf, buflen, result);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
	if (!res && *result)
	write_protoent(ctx, *result);
	return res;
	}

	INTERCEPTOR(int, getprotobyname_r, const char *name,
	struct __sanitizer_protoent result_buf, char buf, SIZE_T buflen,
	struct __sanitizer_protoent **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotobyname_r, name, result_buf, buf,
	buflen, result);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	int res = REAL(getprotobyname_r)(name, result_buf, buf, buflen, result);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
	if (!res && *result)
	write_protoent(ctx, *result);
	return res;
	}

	INTERCEPTOR(int, getprotobynumber_r, int num,
	struct __sanitizer_protoent result_buf, char buf,
	SIZE_T buflen, struct __sanitizer_protoent **result) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getprotobynumber_r, num, result_buf, buf,
	buflen, result);
	int res = REAL(getprotobynumber_r)(num, result_buf, buf, buflen, result);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, result, sizeof *result);
	if (!res && *result)
	write_protoent(ctx, *result);
	return res;
	}

	#define INIT_PROTOENT_R \
	COMMON_INTERCEPT_FUNCTION(getprotoent_r); \
	COMMON_INTERCEPT_FUNCTION(getprotobyname_r); \
	COMMON_INTERCEPT_FUNCTION(getprotobynumber_r);
	#else
	#define INIT_PROTOENT_R
	#endif

	#if SANITIZER_INTERCEPT_NETENT
	INTERCEPTOR(struct __sanitizer_netent *, getnetent) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getnetent);
	struct __sanitizer_netent *n = REAL(getnetent)();
	if (n) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);

	SIZE_T nn_size = 1; // One handles the trailing \0

	for (char *nn = n->n_aliases; nn; ++nn, ++nn_size)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, nn, internal_strlen(nn) + 1);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
	nn_size * sizeof(char **));
	}
	return n;
	}

	INTERCEPTOR(struct __sanitizer_netent , getnetbyname, const char name) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getnetbyname, name);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	struct __sanitizer_netent *n = REAL(getnetbyname)(name);
	if (n) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);

	SIZE_T nn_size = 1; // One handles the trailing \0

	for (char *nn = n->n_aliases; nn; ++nn, ++nn_size)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, nn, internal_strlen(nn) + 1);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
	nn_size * sizeof(char **));
	}
	return n;
	}

	INTERCEPTOR(struct __sanitizer_netent *, getnetbyaddr, u32 net, int type) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getnetbyaddr, net, type);
	struct __sanitizer_netent *n = REAL(getnetbyaddr)(net, type);
	if (n) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n, sizeof(*n));

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_name, internal_strlen(n->n_name) + 1);

	SIZE_T nn_size = 1; // One handles the trailing \0

	for (char *nn = n->n_aliases; nn; ++nn, ++nn_size)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, nn, internal_strlen(nn) + 1);

	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, n->n_aliases,
	nn_size * sizeof(char **));
	}
	return n;
	}
	#define INIT_NETENT \
	COMMON_INTERCEPT_FUNCTION(getnetent); \
	COMMON_INTERCEPT_FUNCTION(getnetbyname); \
	COMMON_INTERCEPT_FUNCTION(getnetbyaddr)
	#else
	#define INIT_NETENT
	#endif

	#if SANITIZER_INTERCEPT_GETMNTINFO
	INTERCEPTOR(int, getmntinfo, void **mntbufp, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getmntinfo, mntbufp, flags);
	int cnt = REAL(getmntinfo)(mntbufp, flags);
	if (cnt > 0 && mntbufp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mntbufp, sizeof(void *));
	if (*mntbufp)
	#if SANITIZER_NETBSD
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mntbufp, cnt struct_statvfs_sz);
	#else
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, mntbufp, cnt struct_statfs_sz);
	#endif
	}
	return cnt;
	}
	#define INIT_GETMNTINFO COMMON_INTERCEPT_FUNCTION(getmntinfo)
	#else
	#define INIT_GETMNTINFO
	#endif

	#if SANITIZER_INTERCEPT_MI_VECTOR_HASH
	INTERCEPTOR(void, mi_vector_hash, const void *key, SIZE_T len, u32 seed,
	u32 hashes[3]) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, mi_vector_hash, key, len, seed, hashes);
	if (key)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, len);
	REAL(mi_vector_hash)(key, len, seed, hashes);
	if (hashes)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, hashes, sizeof(hashes[0]) * 3);
	}
	#define INIT_MI_VECTOR_HASH COMMON_INTERCEPT_FUNCTION(mi_vector_hash)
	#else
	#define INIT_MI_VECTOR_HASH
	#endif

	#if SANITIZER_INTERCEPT_SETVBUF
	INTERCEPTOR(int, setvbuf, __sanitizer_FILE stream, char buf, int mode,
	SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setvbuf, stream, buf, mode, size);
	int ret = REAL(setvbuf)(stream, buf, mode, size);
	if (buf)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
	if (stream)
	unpoison_file(stream);
	return ret;
	}

	INTERCEPTOR(void, setbuf, __sanitizer_FILE stream, char buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setbuf, stream, buf);
	REAL(setbuf)(stream, buf);
	if (buf) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer_bufsiz);
	}
	if (stream)
	unpoison_file(stream);
	}

	INTERCEPTOR(void, setbuffer, __sanitizer_FILE stream, char buf, SIZE_T size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, size);
	REAL(setbuffer)(stream, buf, size);
	if (buf) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
	}
	if (stream)
	unpoison_file(stream);
	}

	INTERCEPTOR(void, setlinebuf, __sanitizer_FILE *stream) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, setlinebuf, stream);
	REAL(setlinebuf)(stream);
	if (stream)
	unpoison_file(stream);
	}
	#define INIT_SETVBUF COMMON_INTERCEPT_FUNCTION(setvbuf); \
	COMMON_INTERCEPT_FUNCTION(setbuf); \
	COMMON_INTERCEPT_FUNCTION(setbuffer); \
	COMMON_INTERCEPT_FUNCTION(setlinebuf)
	#else
	#define INIT_SETVBUF
	#endif

	#if SANITIZER_INTERCEPT_GETVFSSTAT
	INTERCEPTOR(int, getvfsstat, void *buf, SIZE_T bufsize, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getvfsstat, buf, bufsize, flags);
	int ret = REAL(getvfsstat)(buf, bufsize, flags);
	if (buf && ret > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, ret * struct_statvfs_sz);
	return ret;
	}
	#define INIT_GETVFSSTAT COMMON_INTERCEPT_FUNCTION(getvfsstat)
	#else
	#define INIT_GETVFSSTAT
	#endif

	#if SANITIZER_INTERCEPT_REGEX
	INTERCEPTOR(int, regcomp, void preg, const char pattern, int cflags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regcomp, preg, pattern, cflags);
	if (pattern)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pattern, internal_strlen(pattern) + 1);
	int res = REAL(regcomp)(preg, pattern, cflags);
	if (preg)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, preg, struct_regex_sz);
	return res;
	}
	INTERCEPTOR(int, regexec, const void preg, const char string, SIZE_T nmatch,
	struct __sanitizer_regmatch *pmatch[], int eflags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regexec, preg, string, nmatch, pmatch, eflags);
	if (preg)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
	if (string)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, string, internal_strlen(string) + 1);
	int res = REAL(regexec)(preg, string, nmatch, pmatch, eflags);
	if (!res && pmatch)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pmatch, nmatch * struct_regmatch_sz);
	return res;
	}
	INTERCEPTOR(SIZE_T, regerror, int errcode, const void preg, char errbuf,
	SIZE_T errbuf_size) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regerror, errcode, preg, errbuf, errbuf_size);
	if (preg)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
	SIZE_T res = REAL(regerror)(errcode, preg, errbuf, errbuf_size);
	if (errbuf)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errbuf, internal_strlen(errbuf) + 1);
	return res;
	}
	INTERCEPTOR(void, regfree, const void *preg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regfree, preg);
	if (preg)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, preg, struct_regex_sz);
	REAL(regfree)(preg);
	}
	#define INIT_REGEX \
	COMMON_INTERCEPT_FUNCTION(regcomp); \
	COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(regexec, "GLIBC_2.3.4"); \
	COMMON_INTERCEPT_FUNCTION(regerror); \
	COMMON_INTERCEPT_FUNCTION(regfree);
	#else
	#define INIT_REGEX
	#endif

	#if SANITIZER_INTERCEPT_REGEXSUB
	INTERCEPTOR(SSIZE_T, regnsub, char buf, SIZE_T bufsiz, const char sub,
	const struct __sanitizer_regmatch rm, const char str) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regnsub, buf, bufsiz, sub, rm, str);
	if (sub)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
	// The implementation demands and hardcodes 10 elements
	if (rm)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
	if (str)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, str, internal_strlen(str) + 1);
	SSIZE_T res = REAL(regnsub)(buf, bufsiz, sub, rm, str);
	if (res > 0 && buf)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
	return res;
	}
	INTERCEPTOR(SSIZE_T, regasub, char *buf, const char sub,
	const struct __sanitizer_regmatch rm, const char sstr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, regasub, buf, sub, rm, sstr);
	if (sub)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sub, internal_strlen(sub) + 1);
	// Hardcode 10 elements as this is hardcoded size
	if (rm)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rm, 10 * struct_regmatch_sz);
	if (sstr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sstr, internal_strlen(sstr) + 1);
	SSIZE_T res = REAL(regasub)(buf, sub, rm, sstr);
	if (res > 0 && buf) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, sizeof(char *));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
	}
	return res;
	}

	#define INIT_REGEXSUB \
	COMMON_INTERCEPT_FUNCTION(regnsub); \
	COMMON_INTERCEPT_FUNCTION(regasub);
	#else
	#define INIT_REGEXSUB
	#endif

	#if SANITIZER_INTERCEPT_FTS
	INTERCEPTOR(void , fts_open, char const *path_argv, int options,
	int (compar)(void , void *)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fts_open, path_argv, options, compar);
	if (path_argv) {
	for (char const pa = path_argv; ; ++pa) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
	if (!*pa)
	break;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, internal_strlen(pa) + 1);
	}
	}
	// TODO(kamil): handle compar callback
	void *fts = REAL(fts_open)(path_argv, options, compar);
	if (fts)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, fts, struct_FTS_sz);
	return fts;
	}

	INTERCEPTOR(void , fts_read, void ftsp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fts_read, ftsp);
	if (ftsp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
	void *ftsent = REAL(fts_read)(ftsp);
	if (ftsent)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ftsent, struct_FTSENT_sz);
	return ftsent;
	}

	INTERCEPTOR(void , fts_children, void ftsp, int options) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fts_children, ftsp, options);
	if (ftsp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
	void *ftsent = REAL(fts_children)(ftsp, options);
	if (ftsent)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ftsent, struct_FTSENT_sz);
	return ftsent;
	}

	INTERCEPTOR(int, fts_set, void ftsp, void f, int options) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fts_set, ftsp, f, options);
	if (ftsp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
	if (f)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, f, struct_FTSENT_sz);
	return REAL(fts_set)(ftsp, f, options);
	}

	INTERCEPTOR(int, fts_close, void *ftsp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fts_close, ftsp);
	if (ftsp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ftsp, struct_FTS_sz);
	return REAL(fts_close)(ftsp);
	}
	#define INIT_FTS \
	COMMON_INTERCEPT_FUNCTION(fts_open); \
	COMMON_INTERCEPT_FUNCTION(fts_read); \
	COMMON_INTERCEPT_FUNCTION(fts_children); \
	COMMON_INTERCEPT_FUNCTION(fts_set); \
	COMMON_INTERCEPT_FUNCTION(fts_close);
	#else
	#define INIT_FTS
	#endif

	#if SANITIZER_INTERCEPT_SYSCTL
	INTERCEPTOR(int, sysctl, int name, unsigned int namelen, void oldp,
	SIZE_T oldlenp, void newp, SIZE_T newlen) {
	void *ctx;
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_sysctl(name, namelen, oldp, oldlenp, newp, newlen);
	COMMON_INTERCEPTOR_ENTER(ctx, sysctl, name, namelen, oldp, oldlenp, newp,
	newlen);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, namelen * sizeof(*name));
	if (oldlenp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, oldlenp, sizeof(*oldlenp));
	if (newp && newlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, newp, newlen);
	int res = REAL(sysctl)(name, namelen, oldp, oldlenp, newp, newlen);
	if (!res) {
	if (oldlenp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldlenp, sizeof(*oldlenp));
	if (oldp)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldp, *oldlenp);
	}
	}
	return res;
	}

	INTERCEPTOR(int, sysctlbyname, char sname, void oldp, SIZE_T *oldlenp,
	void *newp, SIZE_T newlen) {
	void *ctx;
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_sysctlbyname(sname, oldp, oldlenp, newp, newlen);
	COMMON_INTERCEPTOR_ENTER(ctx, sysctlbyname, sname, oldp, oldlenp, newp,
	newlen);
	if (sname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
	if (oldlenp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, oldlenp, sizeof(*oldlenp));
	if (newp && newlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, newp, newlen);
	int res = REAL(sysctlbyname)(sname, oldp, oldlenp, newp, newlen);
	if (!res) {
	if (oldlenp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldlenp, sizeof(*oldlenp));
	if (oldp)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oldp, *oldlenp);
	}
	}
	return res;
	}

	INTERCEPTOR(int, sysctlnametomib, const char sname, int name,
	SIZE_T *namelenp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sysctlnametomib, sname, name, namelenp);
	if (sname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
	if (namelenp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
	int res = REAL(sysctlnametomib)(sname, name, namelenp);
	if (!res) {
	if (namelenp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelenp, sizeof(*namelenp));
	if (name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, namelenp sizeof(*name));
	}
	}
	return res;
	}

	#define INIT_SYSCTL \
	COMMON_INTERCEPT_FUNCTION(sysctl); \
	COMMON_INTERCEPT_FUNCTION(sysctlbyname); \
	COMMON_INTERCEPT_FUNCTION(sysctlnametomib);
	#else
	#define INIT_SYSCTL
	#endif

	#if SANITIZER_INTERCEPT_ASYSCTL
	INTERCEPTOR(void , asysctl, const int name, SIZE_T namelen, SIZE_T *len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, asysctl, name, namelen, len);
	if (name)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, sizeof(name) namelen);
	void *res = REAL(asysctl)(name, namelen, len);
	if (res && len) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, *len);
	}
	return res;
	}

	INTERCEPTOR(void , asysctlbyname, const char sname, SIZE_T *len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, asysctlbyname, sname, len);
	if (sname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
	void *res = REAL(asysctlbyname)(sname, len);
	if (res && len) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, *len);
	}
	return res;
	}
	#define INIT_ASYSCTL \
	COMMON_INTERCEPT_FUNCTION(asysctl); \
	COMMON_INTERCEPT_FUNCTION(asysctlbyname);
	#else
	#define INIT_ASYSCTL
	#endif

	#if SANITIZER_INTERCEPT_SYSCTLGETMIBINFO
	INTERCEPTOR(int, sysctlgetmibinfo, char sname, int name,
	unsigned int namelenp, char cname, SIZE_T csz, void *rnode,
	int v) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sysctlgetmibinfo, sname, name, namelenp, cname,
	csz, rnode, v);
	if (sname)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sname, internal_strlen(sname) + 1);
	if (namelenp)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, namelenp, sizeof(*namelenp));
	if (csz)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, csz, sizeof(*csz));
	// Skip rnode, it's rarely used and not trivial to sanitize
	// It's also used mostly internally
	int res = REAL(sysctlgetmibinfo)(sname, name, namelenp, cname, csz, rnode, v);
	if (!res) {
	if (namelenp) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, namelenp, sizeof(*namelenp));
	if (name)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, namelenp sizeof(*name));
	}
	if (csz) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, csz, sizeof(*csz));
	if (cname)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cname, *csz);
	}
	}
	return res;
	}
	#define INIT_SYSCTLGETMIBINFO \
	COMMON_INTERCEPT_FUNCTION(sysctlgetmibinfo);
	#else
	#define INIT_SYSCTLGETMIBINFO
	#endif

	#if SANITIZER_INTERCEPT_NL_LANGINFO
	INTERCEPTOR(char *, nl_langinfo, long item) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, nl_langinfo, item);
	char *ret = REAL(nl_langinfo)(item);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
	return ret;
	}
	#define INIT_NL_LANGINFO COMMON_INTERCEPT_FUNCTION(nl_langinfo)
	#else
	#define INIT_NL_LANGINFO
	#endif

	#if SANITIZER_INTERCEPT_MODCTL
	INTERCEPTOR(int, modctl, int operation, void *argp) {
	void *ctx;
	int ret;
	COMMON_INTERCEPTOR_ENTER(ctx, modctl, operation, argp);

	if (operation == modctl_load) {
	if (argp) {
	__sanitizer_modctl_load_t ml = (__sanitizer_modctl_load_t )argp;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ml, sizeof(*ml));
	if (ml->ml_filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_filename,
	internal_strlen(ml->ml_filename) + 1);
	if (ml->ml_props)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, ml->ml_props, ml->ml_propslen);
	}
	ret = REAL(modctl)(operation, argp);
	} else if (operation == modctl_unload) {
	if (argp) {
	const char name = (const char )argp;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, name, internal_strlen(name) + 1);
	}
	ret = REAL(modctl)(operation, argp);
	} else if (operation == modctl_stat) {
	uptr iov_len;
	struct __sanitizer_iovec iov = (struct __sanitizer_iovec )argp;
	if (iov) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, iov, sizeof(*iov));
	iov_len = iov->iov_len;
	}
	ret = REAL(modctl)(operation, argp);
	if (iov)
	COMMON_INTERCEPTOR_WRITE_RANGE(
	ctx, iov->iov_base, Min(iov_len, iov->iov_len));
	} else if (operation == modctl_exists) {
	ret = REAL(modctl)(operation, argp);
	} else {
	ret = REAL(modctl)(operation, argp);
	}

	return ret;
	}
	#define INIT_MODCTL COMMON_INTERCEPT_FUNCTION(modctl)
	#else
	#define INIT_MODCTL
	#endif

	#if SANITIZER_INTERCEPT_STRTONUM
	INTERCEPTOR(long long, strtonum, const char *nptr, long long minval,
	long long maxval, const char **errstr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtonum, nptr, minval, maxval, errstr);

	// TODO(kamil): Implement strtoll as a common inteceptor
	char *real_endptr;
	long long ret = (long long)REAL(strtoimax)(nptr, &real_endptr, 10);
	StrtolFixAndCheck(ctx, nptr, nullptr, real_endptr, 10);

	ret = REAL(strtonum)(nptr, minval, maxval, errstr);
	if (errstr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errstr, sizeof(const char *));
	if (*errstr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, errstr, internal_strlen(errstr) + 1);
	}
	return ret;
	}
	#define INIT_STRTONUM COMMON_INTERCEPT_FUNCTION(strtonum)
	#else
	#define INIT_STRTONUM
	#endif

	#if SANITIZER_INTERCEPT_FPARSELN
	INTERCEPTOR(char , fparseln, __sanitizer_FILE stream, SIZE_T *len,
	SIZE_T *lineno, const char delim[3], int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fparseln, stream, len, lineno, delim, flags);
	if (lineno)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, lineno, sizeof(*lineno));
	if (delim)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, delim, sizeof(delim[0]) * 3);
	char *ret = REAL(fparseln)(stream, len, lineno, delim, flags);
	if (ret) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, internal_strlen(ret) + 1);
	if (len)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, len, sizeof(*len));
	if (lineno)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, lineno, sizeof(*lineno));
	}
	return ret;
	}
	#define INIT_FPARSELN COMMON_INTERCEPT_FUNCTION(fparseln)
	#else
	#define INIT_FPARSELN
	#endif

	#if SANITIZER_INTERCEPT_STATVFS1
	INTERCEPTOR(int, statvfs1, const char path, void buf, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, statvfs1, path, buf, flags);
	if (path) COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	int res = REAL(statvfs1)(path, buf, flags);
	if (!res) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
	return res;
	}
	INTERCEPTOR(int, fstatvfs1, int fd, void *buf, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fstatvfs1, fd, buf, flags);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	int res = REAL(fstatvfs1)(fd, buf, flags);
	if (!res) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, struct_statvfs_sz);
	if (fd >= 0)
	COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	}
	return res;
	}
	#define INIT_STATVFS1 \
	COMMON_INTERCEPT_FUNCTION(statvfs1); \
	COMMON_INTERCEPT_FUNCTION(fstatvfs1);
	#else
	#define INIT_STATVFS1
	#endif

	#if SANITIZER_INTERCEPT_STRTOI
	INTERCEPTOR(INTMAX_T, strtoi, const char nptr, char *endptr, int base,
	INTMAX_T low, INTMAX_T high, int *rstatus) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtoi, nptr, endptr, base, low, high, rstatus);
	char *real_endptr;
	INTMAX_T ret = REAL(strtoi)(nptr, &real_endptr, base, low, high, rstatus);
	StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
	if (rstatus)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rstatus, sizeof(*rstatus));
	return ret;
	}

	INTERCEPTOR(UINTMAX_T, strtou, const char nptr, char *endptr, int base,
	UINTMAX_T low, UINTMAX_T high, int *rstatus) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strtou, nptr, endptr, base, low, high, rstatus);
	char *real_endptr;
	UINTMAX_T ret = REAL(strtou)(nptr, &real_endptr, base, low, high, rstatus);
	StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base);
	if (rstatus)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rstatus, sizeof(*rstatus));
	return ret;
	}
	#define INIT_STRTOI \
	COMMON_INTERCEPT_FUNCTION(strtoi); \
	COMMON_INTERCEPT_FUNCTION(strtou)
	#else
	#define INIT_STRTOI
	#endif

	#if SANITIZER_INTERCEPT_CAPSICUM
	#define CAP_RIGHTS_INIT_INTERCEPTOR(cap_rights_init, rights, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_init, rights, ##__VA_ARGS__); \
	if (rights) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
	__sanitizer_cap_rights_t *ret = \
	REAL(cap_rights_init)(rights, ##__VA_ARGS__); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
	return ret; \
	}

	#define CAP_RIGHTS_SET_INTERCEPTOR(cap_rights_set, rights, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_set, rights, ##__VA_ARGS__); \
	if (rights) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
	__sanitizer_cap_rights_t *ret = \
	REAL(cap_rights_set)(rights, ##__VA_ARGS__); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
	return ret; \
	}

	#define CAP_RIGHTS_CLEAR_INTERCEPTOR(cap_rights_clear, rights, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_clear, rights, ##__VA_ARGS__); \
	if (rights) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
	__sanitizer_cap_rights_t *ret = \
	REAL(cap_rights_clear)(rights, ##__VA_ARGS__); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret)); \
	return ret; \
	}

	#define CAP_RIGHTS_IS_SET_INTERCEPTOR(cap_rights_is_set, rights, ...) \
	{ \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_is_set, rights, ##__VA_ARGS__); \
	if (rights) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights)); \
	return REAL(cap_rights_is_set)(rights, ##__VA_ARGS__); \
	}

	INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_init,
	__sanitizer_cap_rights_t *rights) {
	CAP_RIGHTS_INIT_INTERCEPTOR(cap_rights_init, rights);
	}

	INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_set,
	__sanitizer_cap_rights_t *rights) {
	CAP_RIGHTS_SET_INTERCEPTOR(cap_rights_set, rights);
	}

	INTERCEPTOR(__sanitizer_cap_rights_t *, cap_rights_clear,
	__sanitizer_cap_rights_t *rights) {
	CAP_RIGHTS_CLEAR_INTERCEPTOR(cap_rights_clear, rights);
	}

	INTERCEPTOR(bool, cap_rights_is_set,
	__sanitizer_cap_rights_t *rights) {
	CAP_RIGHTS_IS_SET_INTERCEPTOR(cap_rights_is_set, rights);
	}

	INTERCEPTOR(int, cap_rights_limit, int fd,
	const __sanitizer_cap_rights_t *rights) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_limit, fd, rights);
	if (rights)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights));

	return REAL(cap_rights_limit)(fd, rights);
	}

	INTERCEPTOR(int, cap_rights_get, int fd, __sanitizer_cap_rights_t *rights) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_get, fd, rights);
	int ret = REAL(cap_rights_get)(fd, rights);
	if (!ret && rights)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, rights, sizeof(*rights));

	return ret;
	}

	INTERCEPTOR(bool, cap_rights_is_valid, const __sanitizer_cap_rights_t *rights) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_is_valid, rights);
	if (rights)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, rights, sizeof(*rights));

	return REAL(cap_rights_is_valid(rights));
	}

	INTERCEPTOR(__sanitizer_cap_rights *, cap_rights_merge,
	__sanitizer_cap_rights dst, const __sanitizer_cap_rights src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_merge, dst, src);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));

	__sanitizer_cap_rights *ret = REAL(cap_rights_merge)(dst, src);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));

	return ret;
	}

	INTERCEPTOR(__sanitizer_cap_rights *, cap_rights_remove,
	__sanitizer_cap_rights dst, const __sanitizer_cap_rights src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_remove, dst, src);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, sizeof(*src));

	__sanitizer_cap_rights *ret = REAL(cap_rights_remove)(dst, src);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));

	return ret;
	}

	INTERCEPTOR(bool, cap_rights_contains, const __sanitizer_cap_rights *big,
	const __sanitizer_cap_rights *little) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_rights_contains, big, little);
	if (little)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, little, sizeof(*little));
	if (big)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, big, sizeof(*big));

	return REAL(cap_rights_contains)(big, little);
	}

	INTERCEPTOR(int, cap_ioctls_limit, int fd, const uptr *cmds, SIZE_T ncmds) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_ioctls_limit, fd, cmds, ncmds);
	if (cmds)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cmds, sizeof(cmds) ncmds);

	return REAL(cap_ioctls_limit)(fd, cmds, ncmds);
	}

	INTERCEPTOR(int, cap_ioctls_get, int fd, uptr *cmds, SIZE_T maxcmds) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cap_ioctls_get, fd, cmds, maxcmds);
	int ret = REAL(cap_ioctls_get)(fd, cmds, maxcmds);
	if (!ret && cmds)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cmds, sizeof(cmds) maxcmds);

	return ret;
	}
	#define INIT_CAPSICUM \
	COMMON_INTERCEPT_FUNCTION(cap_rights_init); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_set); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_clear); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_is_set); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_get); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_limit); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_contains); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_remove); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_merge); \
	COMMON_INTERCEPT_FUNCTION(cap_rights_is_valid); \
	COMMON_INTERCEPT_FUNCTION(cap_ioctls_get); \
	COMMON_INTERCEPT_FUNCTION(cap_ioctls_limit)
	#else
	#define INIT_CAPSICUM
	#endif

	#if SANITIZER_INTERCEPT_SHA1
	INTERCEPTOR(void, SHA1Init, void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1Init, context);
	REAL(SHA1Init)(context);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA1_CTX_sz);
	}
	INTERCEPTOR(void, SHA1Update, void context, const u8 data, unsigned len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1Update, context, data, len);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
	REAL(SHA1Update)(context, data, len);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA1_CTX_sz);
	}
	INTERCEPTOR(void, SHA1Final, u8 digest[20], void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1Final, digest, context);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
	REAL(SHA1Final)(digest, context);
	if (digest)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(u8) * 20);
	}
	INTERCEPTOR(void, SHA1Transform, u32 state[5], u8 buffer[64]) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1Transform, state, buffer);
	if (state)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, state, sizeof(u32) * 5);
	if (buffer)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, sizeof(u8) * 64);
	REAL(SHA1Transform)(state, buffer);
	if (state)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, state, sizeof(u32) * 5);
	}
	INTERCEPTOR(char , SHA1End, void context, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1End, context, buf);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA1_CTX_sz);
	char *ret = REAL(SHA1End)(context, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
	return ret;
	}
	INTERCEPTOR(char , SHA1File, char filename, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1File, filename, buf);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(SHA1File)(filename, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
	return ret;
	}
	INTERCEPTOR(char , SHA1FileChunk, char filename, char *buf, OFF_T offset,
	OFF_T length) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1FileChunk, filename, buf, offset, length);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(SHA1FileChunk)(filename, buf, offset, length);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
	return ret;
	}
	INTERCEPTOR(char , SHA1Data, u8 data, SIZE_T len, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, SHA1Data, data, len, buf);
	if (data)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	char *ret = REAL(SHA1Data)(data, len, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA1_return_length);
	return ret;
	}
	#define INIT_SHA1 \
	COMMON_INTERCEPT_FUNCTION(SHA1Init); \
	COMMON_INTERCEPT_FUNCTION(SHA1Update); \
	COMMON_INTERCEPT_FUNCTION(SHA1Final); \
	COMMON_INTERCEPT_FUNCTION(SHA1Transform); \
	COMMON_INTERCEPT_FUNCTION(SHA1End); \
	COMMON_INTERCEPT_FUNCTION(SHA1File); \
	COMMON_INTERCEPT_FUNCTION(SHA1FileChunk); \
	COMMON_INTERCEPT_FUNCTION(SHA1Data)
	#else
	#define INIT_SHA1
	#endif

	#if SANITIZER_INTERCEPT_MD4
	INTERCEPTOR(void, MD4Init, void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4Init, context);
	REAL(MD4Init)(context);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD4_CTX_sz);
	}

	INTERCEPTOR(void, MD4Update, void context, const unsigned char data,
	unsigned int len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4Update, context, data, len);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
	REAL(MD4Update)(context, data, len);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD4_CTX_sz);
	}

	INTERCEPTOR(void, MD4Final, unsigned char digest[16], void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4Final, digest, context);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
	REAL(MD4Final)(digest, context);
	if (digest)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
	}

	INTERCEPTOR(char , MD4End, void context, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4End, context, buf);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD4_CTX_sz);
	char *ret = REAL(MD4End)(context, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD4File, const char filename, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4File, filename, buf);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(MD4File)(filename, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD4Data, const unsigned char data, unsigned int len,
	char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD4Data, data, len, buf);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	char *ret = REAL(MD4Data)(data, len, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD4_return_length);
	return ret;
	}

	#define INIT_MD4 \
	COMMON_INTERCEPT_FUNCTION(MD4Init); \
	COMMON_INTERCEPT_FUNCTION(MD4Update); \
	COMMON_INTERCEPT_FUNCTION(MD4Final); \
	COMMON_INTERCEPT_FUNCTION(MD4End); \
	COMMON_INTERCEPT_FUNCTION(MD4File); \
	COMMON_INTERCEPT_FUNCTION(MD4Data)
	#else
	#define INIT_MD4
	#endif

	#if SANITIZER_INTERCEPT_RMD160
	INTERCEPTOR(void, RMD160Init, void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160Init, context);
	REAL(RMD160Init)(context);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, RMD160_CTX_sz);
	}
	INTERCEPTOR(void, RMD160Update, void context, const u8 data, unsigned len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160Update, context, data, len);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
	REAL(RMD160Update)(context, data, len);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, RMD160_CTX_sz);
	}
	INTERCEPTOR(void, RMD160Final, u8 digest[20], void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160Final, digest, context);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
	REAL(RMD160Final)(digest, context);
	if (digest)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(u8) * 20);
	}
	INTERCEPTOR(void, RMD160Transform, u32 state[5], u16 buffer[16]) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160Transform, state, buffer);
	if (state)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, state, sizeof(u32) * 5);
	if (buffer)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, buffer, sizeof(u32) * 16);
	REAL(RMD160Transform)(state, buffer);
	if (state)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, state, sizeof(u32) * 5);
	}
	INTERCEPTOR(char , RMD160End, void context, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160End, context, buf);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, RMD160_CTX_sz);
	char *ret = REAL(RMD160End)(context, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
	return ret;
	}
	INTERCEPTOR(char , RMD160File, char filename, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160File, filename, buf);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(RMD160File)(filename, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
	return ret;
	}
	INTERCEPTOR(char , RMD160FileChunk, char filename, char *buf, OFF_T offset,
	OFF_T length) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160FileChunk, filename, buf, offset, length);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(RMD160FileChunk)(filename, buf, offset, length);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
	return ret;
	}
	INTERCEPTOR(char , RMD160Data, u8 data, SIZE_T len, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, RMD160Data, data, len, buf);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	char *ret = REAL(RMD160Data)(data, len, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, RMD160_return_length);
	return ret;
	}
	#define INIT_RMD160 \
	COMMON_INTERCEPT_FUNCTION(RMD160Init); \
	COMMON_INTERCEPT_FUNCTION(RMD160Update); \
	COMMON_INTERCEPT_FUNCTION(RMD160Final); \
	COMMON_INTERCEPT_FUNCTION(RMD160Transform); \
	COMMON_INTERCEPT_FUNCTION(RMD160End); \
	COMMON_INTERCEPT_FUNCTION(RMD160File); \
	COMMON_INTERCEPT_FUNCTION(RMD160FileChunk); \
	COMMON_INTERCEPT_FUNCTION(RMD160Data)
	#else
	#define INIT_RMD160
	#endif

	#if SANITIZER_INTERCEPT_MD5
	INTERCEPTOR(void, MD5Init, void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5Init, context);
	REAL(MD5Init)(context);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
	}

	INTERCEPTOR(void, MD5Update, void context, const unsigned char data,
	unsigned int len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5Update, context, data, len);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
	REAL(MD5Update)(context, data, len);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD5_CTX_sz);
	}

	INTERCEPTOR(void, MD5Final, unsigned char digest[16], void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5Final, digest, context);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
	REAL(MD5Final)(digest, context);
	if (digest)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
	}

	INTERCEPTOR(char , MD5End, void context, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5End, context, buf);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD5_CTX_sz);
	char *ret = REAL(MD5End)(context, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD5File, const char filename, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5File, filename, buf);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(MD5File)(filename, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD5Data, const unsigned char data, unsigned int len,
	char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD5Data, data, len, buf);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	char *ret = REAL(MD5Data)(data, len, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD5_return_length);
	return ret;
	}

	#define INIT_MD5 \
	COMMON_INTERCEPT_FUNCTION(MD5Init); \
	COMMON_INTERCEPT_FUNCTION(MD5Update); \
	COMMON_INTERCEPT_FUNCTION(MD5Final); \
	COMMON_INTERCEPT_FUNCTION(MD5End); \
	COMMON_INTERCEPT_FUNCTION(MD5File); \
	COMMON_INTERCEPT_FUNCTION(MD5Data)
	#else
	#define INIT_MD5
	#endif

	#if SANITIZER_INTERCEPT_FSEEK
	INTERCEPTOR(int, fseek, __sanitizer_FILE *stream, long int offset, int whence) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fseek, stream, offset, whence);
	return REAL(fseek)(stream, offset, whence);
	}
	INTERCEPTOR(int, fseeko, __sanitizer_FILE *stream, OFF_T offset, int whence) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fseeko, stream, offset, whence);
	return REAL(fseeko)(stream, offset, whence);
	}
	INTERCEPTOR(long int, ftell, __sanitizer_FILE *stream) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ftell, stream);
	return REAL(ftell)(stream);
	}
	INTERCEPTOR(OFF_T, ftello, __sanitizer_FILE *stream) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, ftello, stream);
	return REAL(ftello)(stream);
	}
	INTERCEPTOR(void, rewind, __sanitizer_FILE *stream) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, rewind, stream);
	return REAL(rewind)(stream);
	}
	INTERCEPTOR(int, fgetpos, __sanitizer_FILE stream, void pos) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fgetpos, stream, pos);
	int ret = REAL(fgetpos)(stream, pos);
	if (pos && !ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, pos, fpos_t_sz);
	return ret;
	}
	INTERCEPTOR(int, fsetpos, __sanitizer_FILE stream, const void pos) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fsetpos, stream, pos);
	if (pos)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pos, fpos_t_sz);
	return REAL(fsetpos)(stream, pos);
	}
	#define INIT_FSEEK \
	COMMON_INTERCEPT_FUNCTION(fseek); \
	COMMON_INTERCEPT_FUNCTION(fseeko); \
	COMMON_INTERCEPT_FUNCTION(ftell); \
	COMMON_INTERCEPT_FUNCTION(ftello); \
	COMMON_INTERCEPT_FUNCTION(rewind); \
	COMMON_INTERCEPT_FUNCTION(fgetpos); \
	COMMON_INTERCEPT_FUNCTION(fsetpos)
	#else
	#define INIT_FSEEK
	#endif

	#if SANITIZER_INTERCEPT_MD2
	INTERCEPTOR(void, MD2Init, void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2Init, context);
	REAL(MD2Init)(context);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD2_CTX_sz);
	}

	INTERCEPTOR(void, MD2Update, void context, const unsigned char data,
	unsigned int len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2Update, context, data, len);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
	REAL(MD2Update)(context, data, len);
	if (context)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, MD2_CTX_sz);
	}

	INTERCEPTOR(void, MD2Final, unsigned char digest[16], void *context) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2Final, digest, context);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
	REAL(MD2Final)(digest, context);
	if (digest)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, sizeof(unsigned char) * 16);
	}

	INTERCEPTOR(char , MD2End, void context, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2End, context, buf);
	if (context)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, MD2_CTX_sz);
	char *ret = REAL(MD2End)(context, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD2File, const char filename, char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2File, filename, buf);
	if (filename)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);
	char *ret = REAL(MD2File)(filename, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
	return ret;
	}

	INTERCEPTOR(char , MD2Data, const unsigned char data, unsigned int len,
	char *buf) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, MD2Data, data, len, buf);
	if (data && len > 0)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len);
	char *ret = REAL(MD2Data)(data, len, buf);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, MD2_return_length);
	return ret;
	}

	#define INIT_MD2 \
	COMMON_INTERCEPT_FUNCTION(MD2Init); \
	COMMON_INTERCEPT_FUNCTION(MD2Update); \
	COMMON_INTERCEPT_FUNCTION(MD2Final); \
	COMMON_INTERCEPT_FUNCTION(MD2End); \
	COMMON_INTERCEPT_FUNCTION(MD2File); \
	COMMON_INTERCEPT_FUNCTION(MD2Data)
	#else
	#define INIT_MD2
	#endif

	#if SANITIZER_INTERCEPT_SHA2
	#define SHA2_INTERCEPTORS(LEN, SHA2_STATE_T) \
	INTERCEPTOR(void, SHA##LEN##_Init, void *context) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Init, context); \
	REAL(SHA##LEN##_Init)(context); \
	if (context) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
	} \
	INTERCEPTOR(void, SHA##LEN##_Update, void *context, \
	const u8 *data, SIZE_T len) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Update, context, data, len); \
	if (data && len > 0) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
	if (context) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
	REAL(SHA##LEN##_Update)(context, data, len); \
	if (context) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
	} \
	INTERCEPTOR(void, SHA##LEN##_Final, u8 digest[LEN/8], \
	void *context) { \
	void *ctx; \
	CHECK_EQ(SHA##LEN##_digest_length, LEN/8); \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Final, digest, context); \
	if (context) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
	REAL(SHA##LEN##_Final)(digest, context); \
	if (digest) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, digest, \
	sizeof(digest[0]) * \
	SHA##LEN##_digest_length); \
	} \
	INTERCEPTOR(char , SHA##LEN##_End, void context, char *buf) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_End, context, buf); \
	if (context) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, context, SHA##LEN##_CTX_sz); \
	char *ret = REAL(SHA##LEN##_End)(context, buf); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
	return ret; \
	} \
	INTERCEPTOR(char , SHA##LEN##_File, const char filename, char *buf) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_File, filename, buf); \
	if (filename) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
	char *ret = REAL(SHA##LEN##_File)(filename, buf); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
	return ret; \
	} \
	INTERCEPTOR(char , SHA##LEN##_FileChunk, const char filename, char *buf, \
	OFF_T offset, OFF_T length) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_FileChunk, filename, buf, offset, \
	length); \
	if (filename) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, filename, internal_strlen(filename) + 1);\
	char *ret = REAL(SHA##LEN##_FileChunk)(filename, buf, offset, length); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
	return ret; \
	} \
	INTERCEPTOR(char , SHA##LEN##_Data, u8 data, SIZE_T len, char *buf) { \
	void *ctx; \
	COMMON_INTERCEPTOR_ENTER(ctx, SHA##LEN##_Data, data, len, buf); \
	if (data && len > 0) \
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, len); \
	char *ret = REAL(SHA##LEN##_Data)(data, len, buf); \
	if (ret) \
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, SHA##LEN##_return_length); \
	return ret; \
	}

	SHA2_INTERCEPTORS(224, u32)
	SHA2_INTERCEPTORS(256, u32)
	SHA2_INTERCEPTORS(384, u64)
	SHA2_INTERCEPTORS(512, u64)

	#define INIT_SHA2_INTECEPTORS(LEN) \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Init); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Update); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Final); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_End); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_File); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_FileChunk); \
	COMMON_INTERCEPT_FUNCTION(SHA##LEN##_Data)

	#define INIT_SHA2 \
	INIT_SHA2_INTECEPTORS(224); \
	INIT_SHA2_INTECEPTORS(256); \
	INIT_SHA2_INTECEPTORS(384); \
	INIT_SHA2_INTECEPTORS(512)
	#undef SHA2_INTERCEPTORS
	#else
	#define INIT_SHA2
	#endif

	#if SANITIZER_INTERCEPT_VIS
	INTERCEPTOR(char , vis, char dst, int c, int flag, int nextc) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, vis, dst, c, flag, nextc);
	char *end = REAL(vis)(dst, c, flag, nextc);
	// dst is NULL terminated and end points to the NULL char
	if (dst && end)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
	return end;
	}
	INTERCEPTOR(char , nvis, char dst, SIZE_T dlen, int c, int flag, int nextc) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, nvis, dst, dlen, c, flag, nextc);
	char *end = REAL(nvis)(dst, dlen, c, flag, nextc);
	// nvis cannot make sure the dst is NULL terminated
	if (dst && end)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
	return end;
	}
	INTERCEPTOR(int, strvis, char dst, const char src, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strvis, dst, src, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int len = REAL(strvis)(dst, src, flag);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
	return len;
	}
	INTERCEPTOR(int, stravis, char *dst, const char src, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, stravis, dst, src, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int len = REAL(stravis)(dst, src, flag);
	if (dst) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(char *));
	if (*dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, *dst, len + 1);
	}
	return len;
	}
	INTERCEPTOR(int, strnvis, char dst, SIZE_T dlen, const char src, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strnvis, dst, dlen, src, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int len = REAL(strnvis)(dst, dlen, src, flag);
	// The interface will be valid even if there is no space for NULL char
	if (dst && len > 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
	return len;
	}
	INTERCEPTOR(int, strvisx, char dst, const char src, SIZE_T len, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strvisx, dst, src, len, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	int ret = REAL(strvisx)(dst, src, len, flag);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strnvisx, char dst, SIZE_T dlen, const char src, SIZE_T len,
	int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strnvisx, dst, dlen, src, len, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	int ret = REAL(strnvisx)(dst, dlen, src, len, flag);
	if (dst && ret >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strenvisx, char dst, SIZE_T dlen, const char src, SIZE_T len,
	int flag, int *cerr_ptr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strenvisx, dst, dlen, src, len, flag, cerr_ptr);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	// FIXME: only need to be checked when "flag \| VIS_NOLOCALE" doesn't hold
	// according to the implementation
	if (cerr_ptr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cerr_ptr, sizeof(int));
	int ret = REAL(strenvisx)(dst, dlen, src, len, flag, cerr_ptr);
	if (dst && ret >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	if (cerr_ptr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cerr_ptr, sizeof(int));
	return ret;
	}
	INTERCEPTOR(char , svis, char dst, int c, int flag, int nextc,
	const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, svis, dst, c, flag, nextc, extra);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	char *end = REAL(svis)(dst, c, flag, nextc, extra);
	if (dst && end)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, end - dst + 1);
	return end;
	}
	INTERCEPTOR(char , snvis, char dst, SIZE_T dlen, int c, int flag, int nextc,
	const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, snvis, dst, dlen, c, flag, nextc, extra);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	char *end = REAL(snvis)(dst, dlen, c, flag, nextc, extra);
	if (dst && end)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst,
	Min((SIZE_T)(end - dst + 1), dlen));
	return end;
	}
	INTERCEPTOR(int, strsvis, char dst, const char src, int flag,
	const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strsvis, dst, src, flag, extra);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	int len = REAL(strsvis)(dst, src, flag, extra);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
	return len;
	}
	INTERCEPTOR(int, strsnvis, char dst, SIZE_T dlen, const char src, int flag,
	const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strsnvis, dst, dlen, src, flag, extra);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	int len = REAL(strsnvis)(dst, dlen, src, flag, extra);
	// The interface will be valid even if there is no space for NULL char
	if (dst && len >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, len + 1);
	return len;
	}
	INTERCEPTOR(int, strsvisx, char dst, const char src, SIZE_T len, int flag,
	const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strsvisx, dst, src, len, flag, extra);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	int ret = REAL(strsvisx)(dst, src, len, flag, extra);
	if (dst)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strsnvisx, char dst, SIZE_T dlen, const char src, SIZE_T len,
	int flag, const char *extra) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strsnvisx, dst, dlen, src, len, flag, extra);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	int ret = REAL(strsnvisx)(dst, dlen, src, len, flag, extra);
	if (dst && ret >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strsenvisx, char dst, SIZE_T dlen, const char src,
	SIZE_T len, int flag, const char extra, int cerr_ptr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strsenvisx, dst, dlen, src, len, flag, extra,
	cerr_ptr);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, len);
	if (extra)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, extra, internal_strlen(extra) + 1);
	// FIXME: only need to be checked when "flag \| VIS_NOLOCALE" doesn't hold
	// according to the implementation
	if (cerr_ptr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cerr_ptr, sizeof(int));
	int ret = REAL(strsenvisx)(dst, dlen, src, len, flag, extra, cerr_ptr);
	if (dst && ret >= 0)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	if (cerr_ptr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cerr_ptr, sizeof(int));
	return ret;
	}
	INTERCEPTOR(int, unvis, char cp, int c, int astate, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, unvis, cp, c, astate, flag);
	if (astate)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, astate, sizeof(*astate));
	int ret = REAL(unvis)(cp, c, astate, flag);
	if (ret == unvis_valid \|\| ret == unvis_validpush) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cp, sizeof(*cp));
	}
	return ret;
	}
	INTERCEPTOR(int, strunvis, char dst, const char src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strunvis, dst, src);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int ret = REAL(strunvis)(dst, src);
	if (ret != -1)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strnunvis, char dst, SIZE_T dlen, const char src) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strnunvis, dst, dlen, src);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int ret = REAL(strnunvis)(dst, dlen, src);
	if (ret != -1)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strunvisx, char dst, const char src, int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strunvisx, dst, src, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int ret = REAL(strunvisx)(dst, src, flag);
	if (ret != -1)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	INTERCEPTOR(int, strnunvisx, char dst, SIZE_T dlen, const char src,
	int flag) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, strnunvisx, dst, dlen, src, flag);
	if (src)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, src, internal_strlen(src) + 1);
	int ret = REAL(strnunvisx)(dst, dlen, src, flag);
	if (ret != -1)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, ret + 1);
	return ret;
	}
	#define INIT_VIS \
	COMMON_INTERCEPT_FUNCTION(vis); \
	COMMON_INTERCEPT_FUNCTION(nvis); \
	COMMON_INTERCEPT_FUNCTION(strvis); \
	COMMON_INTERCEPT_FUNCTION(stravis); \
	COMMON_INTERCEPT_FUNCTION(strnvis); \
	COMMON_INTERCEPT_FUNCTION(strvisx); \
	COMMON_INTERCEPT_FUNCTION(strnvisx); \
	COMMON_INTERCEPT_FUNCTION(strenvisx); \
	COMMON_INTERCEPT_FUNCTION(svis); \
	COMMON_INTERCEPT_FUNCTION(snvis); \
	COMMON_INTERCEPT_FUNCTION(strsvis); \
	COMMON_INTERCEPT_FUNCTION(strsnvis); \
	COMMON_INTERCEPT_FUNCTION(strsvisx); \
	COMMON_INTERCEPT_FUNCTION(strsnvisx); \
	COMMON_INTERCEPT_FUNCTION(strsenvisx); \
	COMMON_INTERCEPT_FUNCTION(unvis); \
	COMMON_INTERCEPT_FUNCTION(strunvis); \
	COMMON_INTERCEPT_FUNCTION(strnunvis); \
	COMMON_INTERCEPT_FUNCTION(strunvisx); \
	COMMON_INTERCEPT_FUNCTION(strnunvisx)
	#else
	#define INIT_VIS
	#endif

	#if SANITIZER_INTERCEPT_CDB
	INTERCEPTOR(struct __sanitizer_cdbr , cdbr_open, const char path, int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_open, path, flags);
	if (path)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	struct __sanitizer_cdbr *cdbr = REAL(cdbr_open)(path, flags);
	if (cdbr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbr, sizeof(*cdbr));
	return cdbr;
	}

	INTERCEPTOR(struct __sanitizer_cdbr , cdbr_open_mem, void base, SIZE_T size,
	int flags, void (unmap)(void , void , SIZE_T), void cookie) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_open_mem, base, size, flags, unmap,
	cookie);
	if (base && size)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, base, size);
	struct __sanitizer_cdbr *cdbr =
	REAL(cdbr_open_mem)(base, size, flags, unmap, cookie);
	if (cdbr)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbr, sizeof(*cdbr));
	return cdbr;
	}

	INTERCEPTOR(u32, cdbr_entries, struct __sanitizer_cdbr *cdbr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_entries, cdbr);
	if (cdbr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
	return REAL(cdbr_entries)(cdbr);
	}

	INTERCEPTOR(int, cdbr_get, struct __sanitizer_cdbr *cdbr, u32 index,
	const void *data, SIZE_T datalen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_get, cdbr, index, data, datalen);
	if (cdbr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
	int ret = REAL(cdbr_get)(cdbr, index, data, datalen);
	if (!ret) {
	if (data)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(*data));
	if (datalen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datalen, sizeof(*datalen));
	if (data && datalen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, datalen);
	}
	return ret;
	}

	INTERCEPTOR(int, cdbr_find, struct __sanitizer_cdbr cdbr, const void key,
	SIZE_T keylen, const void *data, SIZE_T datalen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_find, cdbr, key, keylen, data, datalen);
	if (cdbr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
	if (key)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
	int ret = REAL(cdbr_find)(cdbr, key, keylen, data, datalen);
	if (!ret) {
	if (data)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, sizeof(*data));
	if (datalen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, datalen, sizeof(*datalen));
	if (data && datalen)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data, datalen);
	}
	return ret;
	}

	INTERCEPTOR(void, cdbr_close, struct __sanitizer_cdbr *cdbr) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbr_close, cdbr);
	if (cdbr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbr, sizeof(*cdbr));
	REAL(cdbr_close)(cdbr);
	}

	INTERCEPTOR(struct __sanitizer_cdbw *, cdbw_open) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_open);
	struct __sanitizer_cdbw *ret = REAL(cdbw_open)();
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, sizeof(*ret));
	return ret;
	}

	INTERCEPTOR(int, cdbw_put, struct __sanitizer_cdbw cdbw, const void key,
	SIZE_T keylen, const void *data, SIZE_T datalen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put, cdbw, key, keylen, data, datalen);
	if (cdbw)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
	if (data && datalen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, datalen);
	if (key && keylen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
	int ret = REAL(cdbw_put)(cdbw, key, keylen, data, datalen);
	if (!ret && cdbw)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
	return ret;
	}

	INTERCEPTOR(int, cdbw_put_data, struct __sanitizer_cdbw cdbw, const void data,
	SIZE_T datalen, u32 *index) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put_data, cdbw, data, datalen, index);
	if (cdbw)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
	if (data && datalen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, data, datalen);
	int ret = REAL(cdbw_put_data)(cdbw, data, datalen, index);
	if (!ret) {
	if (index)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, index, sizeof(*index));
	if (cdbw)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
	}
	return ret;
	}

	INTERCEPTOR(int, cdbw_put_key, struct __sanitizer_cdbw cdbw, const void key,
	SIZE_T keylen, u32 index) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_put_key, cdbw, key, keylen, index);
	if (cdbw)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
	if (key && keylen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, keylen);
	int ret = REAL(cdbw_put_key)(cdbw, key, keylen, index);
	if (!ret && cdbw)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
	return ret;
	}

	INTERCEPTOR(int, cdbw_output, struct __sanitizer_cdbw *cdbw, int output,
	const char descr[16], u32 (*seedgen)(void)) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_output, cdbw, output, descr, seedgen);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, output);
	if (cdbw)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
	if (descr)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, descr, internal_strnlen(descr, 16));
	if (seedgen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)seedgen, sizeof(seedgen));
	int ret = REAL(cdbw_output)(cdbw, output, descr, seedgen);
	if (!ret) {
	if (cdbw)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, cdbw, sizeof(*cdbw));
	if (output >= 0)
	COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, output);
	}
	return ret;
	}

	INTERCEPTOR(void, cdbw_close, struct __sanitizer_cdbw *cdbw) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, cdbw_close, cdbw);
	if (cdbw)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, cdbw, sizeof(*cdbw));
	REAL(cdbw_close)(cdbw);
	}

	#define INIT_CDB \
	COMMON_INTERCEPT_FUNCTION(cdbr_open); \
	COMMON_INTERCEPT_FUNCTION(cdbr_open_mem); \
	COMMON_INTERCEPT_FUNCTION(cdbr_entries); \
	COMMON_INTERCEPT_FUNCTION(cdbr_get); \
	COMMON_INTERCEPT_FUNCTION(cdbr_find); \
	COMMON_INTERCEPT_FUNCTION(cdbr_close); \
	COMMON_INTERCEPT_FUNCTION(cdbw_open); \
	COMMON_INTERCEPT_FUNCTION(cdbw_put); \
	COMMON_INTERCEPT_FUNCTION(cdbw_put_data); \
	COMMON_INTERCEPT_FUNCTION(cdbw_put_key); \
	COMMON_INTERCEPT_FUNCTION(cdbw_output); \
	COMMON_INTERCEPT_FUNCTION(cdbw_close)
	#else
	#define INIT_CDB
	#endif

	#if SANITIZER_INTERCEPT_GETFSENT
	INTERCEPTOR(void *, getfsent) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getfsent);
	void *ret = REAL(getfsent)();
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
	return ret;
	}

	INTERCEPTOR(void , getfsspec, const char spec) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getfsspec, spec);
	if (spec)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, spec, internal_strlen(spec) + 1);
	void *ret = REAL(getfsspec)(spec);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
	return ret;
	}

	INTERCEPTOR(void , getfsfile, const char file) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getfsfile, file);
	if (file)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, file, internal_strlen(file) + 1);
	void *ret = REAL(getfsfile)(file);
	if (ret)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ret, struct_fstab_sz);
	return ret;
	}

	#define INIT_GETFSENT \
	COMMON_INTERCEPT_FUNCTION(getfsent); \
	COMMON_INTERCEPT_FUNCTION(getfsspec); \
	COMMON_INTERCEPT_FUNCTION(getfsfile);
	#else
	#define INIT_GETFSENT
	#endif

	#if SANITIZER_INTERCEPT_ARC4RANDOM
	INTERCEPTOR(void, arc4random_buf, void *buf, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, arc4random_buf, buf, len);
	REAL(arc4random_buf)(buf, len);
	if (buf && len)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, len);
	}

	INTERCEPTOR(void, arc4random_addrandom, u8 *dat, int datlen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, arc4random_addrandom, dat, datlen);
	if (dat && datlen)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, dat, datlen);
	REAL(arc4random_addrandom)(dat, datlen);
	}

	#define INIT_ARC4RANDOM \
	COMMON_INTERCEPT_FUNCTION(arc4random_buf); \
	COMMON_INTERCEPT_FUNCTION(arc4random_addrandom);
	#else
	#define INIT_ARC4RANDOM
	#endif

	#if SANITIZER_INTERCEPT_POPEN
	INTERCEPTOR(__sanitizer_FILE , popen, const char command, const char *type) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, popen, command, type);
	if (command)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, command, internal_strlen(command) + 1);
	if (type)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
	__sanitizer_FILE *res = REAL(popen)(command, type);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
	if (res) unpoison_file(res);
	return res;
	}
	#define INIT_POPEN COMMON_INTERCEPT_FUNCTION(popen)
	#else
	#define INIT_POPEN
	#endif

	#if SANITIZER_INTERCEPT_POPENVE
	INTERCEPTOR(__sanitizer_FILE , popenve, const char path,
	char const argv, char const envp, const char *type) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, popenve, path, argv, envp, type);
	if (path)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, path, internal_strlen(path) + 1);
	if (argv) {
	for (char const pa = argv; ; ++pa) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
	if (!*pa)
	break;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, internal_strlen(pa) + 1);
	}
	}
	if (envp) {
	for (char const pa = envp; ; ++pa) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, sizeof(char **));
	if (!*pa)
	break;
	COMMON_INTERCEPTOR_READ_RANGE(ctx, pa, internal_strlen(pa) + 1);
	}
	}
	if (type)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, type, internal_strlen(type) + 1);
	__sanitizer_FILE *res = REAL(popenve)(path, argv, envp, type);
	COMMON_INTERCEPTOR_FILE_OPEN(ctx, res, nullptr);
	if (res) unpoison_file(res);
	return res;
	}
	#define INIT_POPENVE COMMON_INTERCEPT_FUNCTION(popenve)
	#else
	#define INIT_POPENVE
	#endif

	#if SANITIZER_INTERCEPT_PCLOSE
	INTERCEPTOR(int, pclose, __sanitizer_FILE *fp) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, pclose, fp);
	COMMON_INTERCEPTOR_FILE_CLOSE(ctx, fp);
	const FileMetadata *m = GetInterceptorMetadata(fp);
	int res = REAL(pclose)(fp);
	if (m) {
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(m->addr, m->size);
	DeleteInterceptorMetadata(fp);
	}
	return res;
	}
	#define INIT_PCLOSE COMMON_INTERCEPT_FUNCTION(pclose);
	#else
	#define INIT_PCLOSE
	#endif

	#if SANITIZER_INTERCEPT_FUNOPEN
	typedef int (funopen_readfn)(void cookie, char *buf, int len);
	typedef int (funopen_writefn)(void cookie, const char *buf, int len);
	typedef OFF_T (funopen_seekfn)(void cookie, OFF_T offset, int whence);
	typedef int (funopen_closefn)(void cookie);

	struct WrappedFunopenCookie {
	void *real_cookie;
	funopen_readfn real_read;
	funopen_writefn real_write;
	funopen_seekfn real_seek;
	funopen_closefn real_close;
	};

	static int wrapped_funopen_read(void cookie, char buf, int len) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopenCookie wrapped_cookie = (WrappedFunopenCookie )cookie;
	funopen_readfn real_read = wrapped_cookie->real_read;
	return real_read(wrapped_cookie->real_cookie, buf, len);
	}

	static int wrapped_funopen_write(void cookie, const char buf, int len) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopenCookie wrapped_cookie = (WrappedFunopenCookie )cookie;
	funopen_writefn real_write = wrapped_cookie->real_write;
	return real_write(wrapped_cookie->real_cookie, buf, len);
	}

	static OFF_T wrapped_funopen_seek(void *cookie, OFF_T offset, int whence) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopenCookie wrapped_cookie = (WrappedFunopenCookie )cookie;
	funopen_seekfn real_seek = wrapped_cookie->real_seek;
	return real_seek(wrapped_cookie->real_cookie, offset, whence);
	}

	static int wrapped_funopen_close(void *cookie) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	WrappedFunopenCookie wrapped_cookie = (WrappedFunopenCookie )cookie;
	funopen_closefn real_close = wrapped_cookie->real_close;
	int res = real_close(wrapped_cookie->real_cookie);
	InternalFree(wrapped_cookie);
	return res;
	}

	INTERCEPTOR(__sanitizer_FILE , funopen, void cookie, funopen_readfn readfn,
	funopen_writefn writefn, funopen_seekfn seekfn,
	funopen_closefn closefn) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, funopen, cookie, readfn, writefn, seekfn,
	closefn);

	WrappedFunopenCookie *wrapped_cookie =
	(WrappedFunopenCookie *)InternalAlloc(sizeof(WrappedFunopenCookie));
	wrapped_cookie->real_cookie = cookie;
	wrapped_cookie->real_read = readfn;
	wrapped_cookie->real_write = writefn;
	wrapped_cookie->real_seek = seekfn;
	wrapped_cookie->real_close = closefn;

	__sanitizer_FILE *res =
	REAL(funopen)(wrapped_cookie,
	readfn ? wrapped_funopen_read : nullptr,
	writefn ? wrapped_funopen_write : nullptr,
	seekfn ? wrapped_funopen_seek : nullptr,
	closefn ? wrapped_funopen_close : nullptr);
	if (res)
	unpoison_file(res);
	return res;
	}
	#define INIT_FUNOPEN COMMON_INTERCEPT_FUNCTION(funopen)
	#else
	#define INIT_FUNOPEN
	#endif

	#if SANITIZER_INTERCEPT_FUNOPEN2
	typedef SSIZE_T (funopen2_readfn)(void cookie, void *buf, SIZE_T len);
	typedef SSIZE_T (funopen2_writefn)(void cookie, const void *buf, SIZE_T len);
	typedef OFF_T (funopen2_seekfn)(void cookie, OFF_T offset, int whence);
	typedef int (funopen2_flushfn)(void cookie);
	typedef int (funopen2_closefn)(void cookie);

	struct WrappedFunopen2Cookie {
	void *real_cookie;
	funopen2_readfn real_read;
	funopen2_writefn real_write;
	funopen2_seekfn real_seek;
	funopen2_flushfn real_flush;
	funopen2_closefn real_close;
	};

	static SSIZE_T wrapped_funopen2_read(void cookie, void buf, SIZE_T len) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopen2Cookie wrapped_cookie = (WrappedFunopen2Cookie )cookie;
	funopen2_readfn real_read = wrapped_cookie->real_read;
	return real_read(wrapped_cookie->real_cookie, buf, len);
	}

	static SSIZE_T wrapped_funopen2_write(void cookie, const void buf,
	SIZE_T len) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopen2Cookie wrapped_cookie = (WrappedFunopen2Cookie )cookie;
	funopen2_writefn real_write = wrapped_cookie->real_write;
	return real_write(wrapped_cookie->real_cookie, buf, len);
	}

	static OFF_T wrapped_funopen2_seek(void *cookie, OFF_T offset, int whence) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	WrappedFunopen2Cookie wrapped_cookie = (WrappedFunopen2Cookie )cookie;
	funopen2_seekfn real_seek = wrapped_cookie->real_seek;
	return real_seek(wrapped_cookie->real_cookie, offset, whence);
	}

	static int wrapped_funopen2_flush(void *cookie) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	WrappedFunopen2Cookie wrapped_cookie = (WrappedFunopen2Cookie )cookie;
	funopen2_flushfn real_flush = wrapped_cookie->real_flush;
	return real_flush(wrapped_cookie->real_cookie);
	}

	static int wrapped_funopen2_close(void *cookie) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(1);
	WrappedFunopen2Cookie wrapped_cookie = (WrappedFunopen2Cookie )cookie;
	funopen2_closefn real_close = wrapped_cookie->real_close;
	int res = real_close(wrapped_cookie->real_cookie);
	InternalFree(wrapped_cookie);
	return res;
	}

	INTERCEPTOR(__sanitizer_FILE , funopen2, void cookie, funopen2_readfn readfn,
	funopen2_writefn writefn, funopen2_seekfn seekfn,
	funopen2_flushfn flushfn, funopen2_closefn closefn) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, funopen2, cookie, readfn, writefn, seekfn,
	flushfn, closefn);

	WrappedFunopen2Cookie *wrapped_cookie =
	(WrappedFunopen2Cookie *)InternalAlloc(sizeof(WrappedFunopen2Cookie));
	wrapped_cookie->real_cookie = cookie;
	wrapped_cookie->real_read = readfn;
	wrapped_cookie->real_write = writefn;
	wrapped_cookie->real_seek = seekfn;
	wrapped_cookie->real_flush = flushfn;
	wrapped_cookie->real_close = closefn;

	__sanitizer_FILE *res =
	REAL(funopen2)(wrapped_cookie,
	readfn ? wrapped_funopen2_read : nullptr,
	writefn ? wrapped_funopen2_write : nullptr,
	seekfn ? wrapped_funopen2_seek : nullptr,
	flushfn ? wrapped_funopen2_flush : nullptr,
	closefn ? wrapped_funopen2_close : nullptr);
	if (res)
	unpoison_file(res);
	return res;
	}
	#define INIT_FUNOPEN2 COMMON_INTERCEPT_FUNCTION(funopen2)
	#else
	#define INIT_FUNOPEN2
	#endif

	#if SANITIZER_INTERCEPT_FDEVNAME
	INTERCEPTOR(char *, fdevname, int fd) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fdevname, fd);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	char *name = REAL(fdevname)(fd);
	if (name) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, name, internal_strlen(name) + 1);
	if (fd > 0)
	COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	}
	return name;
	}

	INTERCEPTOR(char , fdevname_r, int fd, char buf, SIZE_T len) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, fdevname_r, fd, buf, len);
	COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd);
	char *name = REAL(fdevname_r)(fd, buf, len);
	if (name && buf && len > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1);
	if (fd > 0)
	COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd);
	}
	return name;
	}

	#define INIT_FDEVNAME \
	COMMON_INTERCEPT_FUNCTION(fdevname); \
	COMMON_INTERCEPT_FUNCTION(fdevname_r);
	#else
	#define INIT_FDEVNAME
	#endif

	#if SANITIZER_INTERCEPT_GETUSERSHELL
	INTERCEPTOR(char *, getusershell) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getusershell);
	char *res = REAL(getusershell)();
	if (res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}

	#define INIT_GETUSERSHELL COMMON_INTERCEPT_FUNCTION(getusershell);
	#else
	#define INIT_GETUSERSHELL
	#endif

	#if SANITIZER_INTERCEPT_SL_INIT
	INTERCEPTOR(void *, sl_init) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sl_init);
	void *res = REAL(sl_init)();
	if (res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, __sanitizer::struct_StringList_sz);
	return res;
	}

	INTERCEPTOR(int, sl_add, void sl, char item) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sl_add, sl, item);
	if (sl)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
	if (item)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
	int res = REAL(sl_add)(sl, item);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
	return res;
	}

	INTERCEPTOR(char , sl_find, void sl, const char *item) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sl_find, sl, item);
	if (sl)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
	if (item)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, item, internal_strlen(item) + 1);
	char *res = REAL(sl_find)(sl, item);
	if (res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, res, internal_strlen(res) + 1);
	return res;
	}

	INTERCEPTOR(void, sl_free, void *sl, int freeall) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sl_free, sl, freeall);
	if (sl)
	COMMON_INTERCEPTOR_READ_RANGE(ctx, sl, __sanitizer::struct_StringList_sz);
	REAL(sl_free)(sl, freeall);
	}

	#define INIT_SL_INIT \
	COMMON_INTERCEPT_FUNCTION(sl_init); \
	COMMON_INTERCEPT_FUNCTION(sl_add); \
	COMMON_INTERCEPT_FUNCTION(sl_find); \
	COMMON_INTERCEPT_FUNCTION(sl_free);
	#else
	#define INIT_SL_INIT
	#endif

	#if SANITIZER_INTERCEPT_GETRANDOM
	INTERCEPTOR(SSIZE_T, getrandom, void *buf, SIZE_T buflen, unsigned int flags) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getrandom, buf, buflen, flags);
	SSIZE_T n = REAL(getrandom)(buf, buflen, flags);
	if (n > 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, n);
	}
	return n;
	}
	#define INIT_GETRANDOM COMMON_INTERCEPT_FUNCTION(getrandom)
	#else
	#define INIT_GETRANDOM
	#endif

	#if SANITIZER_INTERCEPT_CRYPT
	INTERCEPTOR(char , crypt, char key, char *salt) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, crypt, key, salt);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1);
	char *res = REAL(crypt)(key, salt);
	if (res != nullptr)
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	return res;
	}
	#define INIT_CRYPT COMMON_INTERCEPT_FUNCTION(crypt);
	#else
	#define INIT_CRYPT
	#endif

	#if SANITIZER_INTERCEPT_CRYPT_R
	INTERCEPTOR(char , crypt_r, char key, char salt, void data) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, crypt_r, key, salt, data);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, key, internal_strlen(key) + 1);
	COMMON_INTERCEPTOR_READ_RANGE(ctx, salt, internal_strlen(salt) + 1);
	char *res = REAL(crypt_r)(key, salt, data);
	if (res != nullptr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, data,
	__sanitizer::struct_crypt_data_sz);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(res, internal_strlen(res) + 1);
	}
	return res;
	}
	#define INIT_CRYPT_R COMMON_INTERCEPT_FUNCTION(crypt_r);
	#else
	#define INIT_CRYPT_R
	#endif

	#if SANITIZER_INTERCEPT_GETENTROPY
	INTERCEPTOR(int, getentropy, void *buf, SIZE_T buflen) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, getentropy, buf, buflen);
	int r = REAL(getentropy)(buf, buflen);
	if (r == 0) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, buflen);
	}
	return r;
	}
	#define INIT_GETENTROPY COMMON_INTERCEPT_FUNCTION(getentropy)
	#else
	#define INIT_GETENTROPY
	#endif

	#if SANITIZER_INTERCEPT_QSORT_R
	typedef int (qsort_r_compar_f)(const void , const void , void );
	struct qsort_r_compar_params {
	SIZE_T size;
	qsort_r_compar_f compar;
	void *arg;
	};
	static int wrapped_qsort_r_compar(const void a, const void b, void *arg) {
	qsort_r_compar_params params = (qsort_r_compar_params )arg;
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, params->size);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, params->size);
	return params->compar(a, b, params->arg);
	}

	INTERCEPTOR(void, qsort_r, void *base, SIZE_T nmemb, SIZE_T size,
	qsort_r_compar_f compar, void *arg) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, qsort_r, base, nmemb, size, compar, arg);
	// Run the comparator over all array elements to detect any memory issues.
	if (nmemb > 1) {
	for (SIZE_T i = 0; i < nmemb - 1; ++i) {
	void p = (void )((char )base + i size);
	void q = (void )((char )base + (i + 1) size);
	COMMON_INTERCEPTOR_UNPOISON_PARAM(3);
	compar(p, q, arg);
	}
	}
	qsort_r_compar_params params = {size, compar, arg};
	REAL(qsort_r)(base, nmemb, size, wrapped_qsort_r_compar, &params);
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
	}
	# define INIT_QSORT_R COMMON_INTERCEPT_FUNCTION(qsort_r)
	#else
	# define INIT_QSORT_R
	#endif

	#if SANITIZER_INTERCEPT_QSORT && SANITIZER_INTERCEPT_QSORT_R
	INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
	qsort_r_compar_f compar) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, qsort, base, nmemb, size, compar);
	WRAP(qsort_r)(base, nmemb, size, compar, nullptr);
	}
	# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
	#elif SANITIZER_INTERCEPT_QSORT && !SANITIZER_INTERCEPT_QSORT_R
	// Glibc qsort uses a temporary buffer allocated either on stack or on heap.
	// Poisoned memory from there may get copied into the comparator arguments,
	// where it needs to be dealt with. But even that is not enough - the results of
	// the sort may be copied into the input/output array based on the results of
	// the comparator calls, but directly from the temp memory, bypassing the
	// unpoisoning done in wrapped_qsort_compar. We deal with this by, again,
	// unpoisoning the entire array after the sort is done.
	//
	// We can not check that the entire array is initialized at the beginning. IMHO,
	// it's fine for parts of the sorted objects to contain uninitialized memory,
	// ex. as padding in structs.
	typedef int (qsort_compar_f)(const void , const void *);
	static THREADLOCAL qsort_compar_f qsort_compar;
	static THREADLOCAL SIZE_T qsort_size;
	static int wrapped_qsort_compar(const void a, const void b) {
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(a, qsort_size);
	COMMON_INTERCEPTOR_INITIALIZE_RANGE(b, qsort_size);
	return qsort_compar(a, b);
	}

	INTERCEPTOR(void, qsort, void *base, SIZE_T nmemb, SIZE_T size,
	qsort_compar_f compar) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, qsort, base, nmemb, size, compar);
	// Run the comparator over all array elements to detect any memory issues.
	if (nmemb > 1) {
	for (SIZE_T i = 0; i < nmemb - 1; ++i) {
	void p = (void )((char )base + i size);
	void q = (void )((char )base + (i + 1) size);
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	compar(p, q);
	}
	}
	qsort_compar_f old_compar = qsort_compar;
	SIZE_T old_size = qsort_size;
	// Handle qsort() implementations that recurse using an
	// interposable function call:
	bool already_wrapped = compar == wrapped_qsort_compar;
	if (already_wrapped) {
	// This case should only happen if the qsort() implementation calls itself
	// using a preemptible function call (e.g. the FreeBSD libc version).
	// Check that the size and comparator arguments are as expected.
	CHECK_NE(compar, qsort_compar);
	CHECK_EQ(qsort_size, size);
	} else {
	qsort_compar = compar;
	qsort_size = size;
	}
	REAL(qsort)(base, nmemb, size, wrapped_qsort_compar);
	if (!already_wrapped) {
	qsort_compar = old_compar;
	qsort_size = old_size;
	}
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, base, nmemb * size);
	}
	# define INIT_QSORT COMMON_INTERCEPT_FUNCTION(qsort)
	#else
	# define INIT_QSORT
	#endif

	#if SANITIZER_INTERCEPT_BSEARCH
	typedef int (bsearch_compar_f)(const void , const void *);
	struct bsearch_compar_params {
	const void *key;
	bsearch_compar_f compar;
	};

	static int wrapped_bsearch_compar(const void key, const void b) {
	const bsearch_compar_params params = (const bsearch_compar_params )key;
	COMMON_INTERCEPTOR_UNPOISON_PARAM(2);
	return params->compar(params->key, b);
	}

	INTERCEPTOR(void , bsearch, const void key, const void *base, SIZE_T nmemb,
	SIZE_T size, bsearch_compar_f compar) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, bsearch, key, base, nmemb, size, compar);
	bsearch_compar_params params = {key, compar};
	return REAL(bsearch)(&params, base, nmemb, size, wrapped_bsearch_compar);
	}
	# define INIT_BSEARCH COMMON_INTERCEPT_FUNCTION(bsearch)
	#else
	# define INIT_BSEARCH
	#endif

	#if SANITIZER_INTERCEPT_SIGALTSTACK
	INTERCEPTOR(int, sigaltstack, void ss, void oss) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, sigaltstack, ss, oss);
	int r = REAL(sigaltstack)(ss, oss);
	if (r == 0 && oss != nullptr) {
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, oss, struct_stack_t_sz);
	}
	return r;
	}
	#define INIT_SIGALTSTACK COMMON_INTERCEPT_FUNCTION(sigaltstack)
	#else
	#define INIT_SIGALTSTACK
	#endif

	#if SANITIZER_INTERCEPT_PROCCTL
	INTERCEPTOR(int, procctl, int idtype, u64 id, int cmd, uptr data) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, procctl, idtype, id, cmd, data);
	static const int PROC_REAP_ACQUIRE = 2;
	static const int PROC_REAP_RELEASE = 3;
	static const int PROC_REAP_STATUS = 4;
	static const int PROC_REAP_GETPIDS = 5;
	static const int PROC_REAP_KILL = 6;
	if (cmd < PROC_REAP_ACQUIRE \|\| cmd > PROC_REAP_KILL) {
	COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)data, sizeof(int));
	} else {
	// reap_acquire/reap_release bears no arguments.
	if (cmd > PROC_REAP_RELEASE) {
	unsigned int reapsz;
	switch (cmd) {
	case PROC_REAP_STATUS:
	reapsz = struct_procctl_reaper_status_sz;
	break;
	case PROC_REAP_GETPIDS:
	reapsz = struct_procctl_reaper_pids_sz;
	break;
	case PROC_REAP_KILL:
	reapsz = struct_procctl_reaper_kill_sz;
	break;
	}
	COMMON_INTERCEPTOR_READ_RANGE(ctx, (void *)data, reapsz);
	}
	}
	return REAL(procctl)(idtype, id, cmd, data);
	}
	#define INIT_PROCCTL COMMON_INTERCEPT_FUNCTION(procctl)
	#else
	#define INIT_PROCCTL
	#endif

	#if SANITIZER_INTERCEPT_UNAME
	INTERCEPTOR(int, uname, struct utsname *utsname) {
	#if SANITIZER_LINUX
	if (COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED)
	return internal_uname(utsname);
	#endif
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, uname, utsname);
	int res = REAL(uname)(utsname);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, utsname,
	__sanitizer::struct_utsname_sz);
	return res;
	}
	#define INIT_UNAME COMMON_INTERCEPT_FUNCTION(uname)
	#else
	#define INIT_UNAME
	#endif

	#if SANITIZER_INTERCEPT___XUNAME
	// FreeBSD's <sys/utsname.h> define uname() as
	// static __inline int uname(struct utsname *name) {
	// return __xuname(SYS_NMLN, (void*)name);
	// }
	INTERCEPTOR(int, __xuname, int size, void *utsname) {
	void *ctx;
	COMMON_INTERCEPTOR_ENTER(ctx, __xuname, size, utsname);
	int res = REAL(__xuname)(size, utsname);
	if (!res)
	COMMON_INTERCEPTOR_WRITE_RANGE(ctx, utsname,
	__sanitizer::struct_utsname_sz);
	return res;
	}
	#define INIT___XUNAME COMMON_INTERCEPT_FUNCTION(__xuname)
	#else
	#define INIT___XUNAME
	#endif

	#include "sanitizer_common_interceptors_netbsd_compat.inc"

	static void InitializeCommonInterceptors() {
	#if SI_POSIX
	static u64 metadata_mem[sizeof(MetadataHashMap) / sizeof(u64) + 1];
	interceptor_metadata_map = new ((void *)&metadata_mem) MetadataHashMap();
	#endif

	INIT_MMAP;
	INIT_MMAP64;
	INIT_TEXTDOMAIN;
	INIT_STRLEN;
	INIT_STRNLEN;
	INIT_STRNDUP;
	INIT___STRNDUP;
	INIT_STRCMP;
	INIT_STRNCMP;
	INIT_STRCASECMP;
	INIT_STRNCASECMP;
	INIT_STRSTR;
	INIT_STRCASESTR;
	INIT_STRCHR;
	INIT_STRCHRNUL;
	INIT_STRRCHR;
	INIT_STRSPN;
	INIT_STRTOK;
	INIT_STRPBRK;
	INIT_STRXFRM;
	INIT___STRXFRM_L;
	INIT_MEMSET;
	INIT_MEMMOVE;
	INIT_MEMCPY;
	INIT_MEMCHR;
	INIT_MEMCMP;
	INIT_BCMP;
	INIT_MEMRCHR;
	INIT_MEMMEM;
	INIT_READ;
	INIT_FREAD;
	INIT_PREAD;
	INIT_PREAD64;
	INIT_READV;
	INIT_PREADV;
	INIT_PREADV64;
	INIT_WRITE;
	INIT_FWRITE;
	INIT_PWRITE;
	INIT_PWRITE64;
	INIT_WRITEV;
	INIT_PWRITEV;
	INIT_PWRITEV64;
	INIT_FGETS;
	INIT_FPUTS;
	INIT_PUTS;
	INIT_PRCTL;
	INIT_LOCALTIME_AND_FRIENDS;
	INIT_STRPTIME;
	INIT_SCANF;
	INIT_ISOC99_SCANF;
	INIT_PRINTF;
	INIT_PRINTF_L;
	INIT_ISOC99_PRINTF;
	INIT_FREXP;
	INIT_FREXPF_FREXPL;
	INIT_GETPWNAM_AND_FRIENDS;
	INIT_GETPWNAM_R_AND_FRIENDS;
	INIT_GETPWENT;
	INIT_FGETPWENT;
	INIT_GETPWENT_R;
	INIT_FGETPWENT_R;
	INIT_FGETGRENT_R;
	INIT_SETPWENT;
	INIT_CLOCK_GETTIME;
	INIT_CLOCK_GETCPUCLOCKID;
	INIT_GETITIMER;
	INIT_TIME;
	INIT_GLOB;
	INIT_GLOB64;
	INIT___B64_TO;
	INIT___DN_EXPAND;
	INIT_POSIX_SPAWN;
	INIT_WAIT;
	INIT_WAIT4;
	INIT_INET;
	INIT_PTHREAD_GETSCHEDPARAM;
	INIT_GETADDRINFO;
	INIT_GETNAMEINFO;
	INIT_GETSOCKNAME;
	INIT_GETHOSTBYNAME;
	INIT_GETHOSTBYNAME2;
	INIT_GETHOSTBYNAME_R;
	INIT_GETHOSTBYNAME2_R;
	INIT_GETHOSTBYADDR_R;
	INIT_GETHOSTENT_R;
	INIT_GETSOCKOPT;
	INIT_ACCEPT;
	INIT_ACCEPT4;
	INIT_PACCEPT;
	INIT_MODF;
	INIT_RECVMSG;
	INIT_SENDMSG;
	INIT_RECVMMSG;
	INIT_SENDMMSG;
	INIT_SYSMSG;
	INIT_GETPEERNAME;
	INIT_IOCTL;
	INIT_INET_ATON;
	INIT_SYSINFO;
	INIT_READDIR;
	INIT_READDIR64;
	INIT_PTRACE;
	INIT_SETLOCALE;
	INIT_GETCWD;
	INIT_GET_CURRENT_DIR_NAME;
	INIT_STRTOIMAX;
	INIT_MBSTOWCS;
	INIT_MBSNRTOWCS;
	INIT_WCSTOMBS;
	INIT_WCSNRTOMBS;
	INIT_WCRTOMB;
	INIT_WCTOMB;
	INIT_TCGETATTR;
	INIT_REALPATH;
	INIT_CANONICALIZE_FILE_NAME;
	INIT_CONFSTR;
	INIT_SCHED_GETAFFINITY;
	INIT_SCHED_GETPARAM;
	INIT_STRERROR;
	INIT_STRERROR_R;
	INIT_XPG_STRERROR_R;
	INIT_SCANDIR;
	INIT_SCANDIR64;
	INIT_GETGROUPS;
	INIT_POLL;
	INIT_PPOLL;
	INIT_WORDEXP;
	INIT_SIGWAIT;
	INIT_SIGWAITINFO;
	INIT_SIGTIMEDWAIT;
	INIT_SIGSETOPS;
	INIT_SIGSET_LOGICOPS;
	INIT_SIGPENDING;
	INIT_SIGPROCMASK;
	INIT_PTHREAD_SIGMASK;
	INIT_BACKTRACE;
	INIT__EXIT;
	INIT_PTHREAD_MUTEX_LOCK;
	INIT_PTHREAD_MUTEX_UNLOCK;
	INIT___PTHREAD_MUTEX_LOCK;
	INIT___PTHREAD_MUTEX_UNLOCK;
	INIT___LIBC_MUTEX_LOCK;
	INIT___LIBC_MUTEX_UNLOCK;
	INIT___LIBC_THR_SETCANCELSTATE;
	INIT_GETMNTENT;
	INIT_GETMNTENT_R;
	INIT_STATFS;
	INIT_STATFS64;
	INIT_STATVFS;
	INIT_STATVFS64;
	INIT_INITGROUPS;
	INIT_ETHER_NTOA_ATON;
	INIT_ETHER_HOST;
	INIT_ETHER_R;
	INIT_SHMCTL;
	INIT_RANDOM_R;
	INIT_PTHREAD_ATTR_GET;
	INIT_PTHREAD_ATTR_GET_SCHED;
	INIT_PTHREAD_ATTR_GETINHERITSCHED;
	INIT_PTHREAD_ATTR_GETAFFINITY_NP;
	INIT_PTHREAD_GETAFFINITY_NP;
	INIT_PTHREAD_MUTEXATTR_GETPSHARED;
	INIT_PTHREAD_MUTEXATTR_GETTYPE;
	INIT_PTHREAD_MUTEXATTR_GETPROTOCOL;
	INIT_PTHREAD_MUTEXATTR_GETPRIOCEILING;
	INIT_PTHREAD_MUTEXATTR_GETROBUST;
	INIT_PTHREAD_MUTEXATTR_GETROBUST_NP;
	INIT_PTHREAD_RWLOCKATTR_GETPSHARED;
	INIT_PTHREAD_RWLOCKATTR_GETKIND_NP;
	INIT_PTHREAD_CONDATTR_GETPSHARED;
	INIT_PTHREAD_CONDATTR_GETCLOCK;
	INIT_PTHREAD_BARRIERATTR_GETPSHARED;
	INIT_TMPNAM;
	INIT_TMPNAM_R;
	INIT_PTSNAME;
	INIT_PTSNAME_R;
	INIT_TTYNAME;
	INIT_TTYNAME_R;
	INIT_TEMPNAM;
	INIT_PTHREAD_SETNAME_NP;
	INIT_PTHREAD_GETNAME_NP;
	INIT_SINCOS;
	INIT_REMQUO;
	INIT_REMQUOL;
	INIT_LGAMMA;
	INIT_LGAMMAL;
	INIT_LGAMMA_R;
	INIT_LGAMMAL_R;
	INIT_DRAND48_R;
	INIT_RAND_R;
	INIT_GETLINE;
	INIT_ICONV;
	INIT_TIMES;
	INIT_TLS_GET_ADDR;
	INIT_LISTXATTR;
	INIT_GETXATTR;
	INIT_GETRESID;
	INIT_GETIFADDRS;
	INIT_IF_INDEXTONAME;
	INIT_CAPGET;
	INIT_AEABI_MEM;
	INIT___BZERO;
	INIT_BZERO;
	INIT_FTIME;
	INIT_XDR;
	INIT_XDRREC_LINUX;
	INIT_TSEARCH;
	INIT_LIBIO_INTERNALS;
	INIT_FOPEN;
	INIT_FOPEN64;
	INIT_FLOPEN;
	INIT_OPEN_MEMSTREAM;
	INIT_OBSTACK;
	INIT_FFLUSH;
	INIT_FCLOSE;
	INIT_DLOPEN_DLCLOSE;
	INIT_GETPASS;
	INIT_TIMERFD;
	INIT_MLOCKX;
	INIT_FOPENCOOKIE;
	INIT_SEM;
	INIT_PTHREAD_SETCANCEL;
	INIT_MINCORE;
	INIT_PROCESS_VM_READV;
	INIT_CTERMID;
	INIT_CTERMID_R;
	INIT_RECV_RECVFROM;
	INIT_SEND_SENDTO;
	INIT_STAT;
	INIT_STAT64;
	INIT_EVENTFD_READ_WRITE;
	INIT_LSTAT;
	INIT_LSTAT64;
	INIT___XSTAT;
	INIT___XSTAT64;
	INIT___LXSTAT;
	INIT___LXSTAT64;
	// FIXME: add other *stat interceptors.
	INIT_UTMP;
	INIT_UTMPX;
	INIT_GETLOADAVG;
	INIT_WCSLEN;
	INIT_WCSCAT;
	INIT_WCSDUP;
	INIT_WCSXFRM;
	INIT___WCSXFRM_L;
	INIT_ACCT;
	INIT_USER_FROM_UID;
	INIT_UID_FROM_USER;
	INIT_GROUP_FROM_GID;
	INIT_GID_FROM_GROUP;
	INIT_ACCESS;
	INIT_FACCESSAT;
	INIT_GETGROUPLIST;
	INIT_GETGROUPMEMBERSHIP;
	INIT_READLINK;
	INIT_READLINKAT;
	INIT_NAME_TO_HANDLE_AT;
	INIT_OPEN_BY_HANDLE_AT;
	INIT_STRLCPY;
	INIT_DEVNAME;
	INIT_DEVNAME_R;
	INIT_FGETLN;
	INIT_STRMODE;
	INIT_TTYENT;
	INIT_PROTOENT;
	INIT_PROTOENT_R;
	INIT_NETENT;
	INIT_GETMNTINFO;
	INIT_MI_VECTOR_HASH;
	INIT_SETVBUF;
	INIT_GETVFSSTAT;
	INIT_REGEX;
	INIT_REGEXSUB;
	INIT_FTS;
	INIT_SYSCTL;
	INIT_ASYSCTL;
	INIT_SYSCTLGETMIBINFO;
	INIT_NL_LANGINFO;
	INIT_MODCTL;
	INIT_STRTONUM;
	INIT_FPARSELN;
	INIT_STATVFS1;
	INIT_STRTOI;
	INIT_CAPSICUM;
	INIT_SHA1;
	INIT_MD4;
	INIT_RMD160;
	INIT_MD5;
	INIT_FSEEK;
	INIT_MD2;
	INIT_SHA2;
	INIT_VIS;
	INIT_CDB;
	INIT_GETFSENT;
	INIT_ARC4RANDOM;
	INIT_POPEN;
	INIT_POPENVE;
	INIT_PCLOSE;
	INIT_FUNOPEN;
	INIT_FUNOPEN2;
	INIT_FDEVNAME;
	INIT_GETUSERSHELL;
	INIT_SL_INIT;
	INIT_GETRANDOM;
	INIT_CRYPT;
	INIT_CRYPT_R;
	INIT_GETENTROPY;
	INIT_QSORT;
	INIT_QSORT_R;
	INIT_BSEARCH;
	INIT_SIGALTSTACK;
	INIT_PROCCTL
	INIT_UNAME;
	INIT___XUNAME;

	INIT___PRINTF_CHK;
	}
	diff --git a/libcxx/include/__config b/libcxx/include/__config
	index 589b5c3b2241..5f62b974170f 100644
	--- a/libcxx/include/__config
	+++ b/libcxx/include/__config
	@@ -1,1231 +1,1231 @@
	// -- C++ --
	//===----------------------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP___CONFIG
	#define _LIBCPP___CONFIG

	#include <__config_site>

	#if defined(_MSC_VER) && !defined(__clang__)
	# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	# define _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
	# endif
	#endif

	#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
	# pragma GCC system_header
	#endif

	#if defined(__apple_build_version__)
	# define _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_APPLE_CLANG_VER (__apple_build_version__ / 10000)
	#elif defined(__clang__)
	# define _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_CLANG_VER (__clang_major__ * 100 + __clang_minor__)
	#elif defined(__GNUC__)
	# define _LIBCPP_COMPILER_GCC
	#elif defined(_MSC_VER)
	# define _LIBCPP_COMPILER_MSVC
	#endif

	#ifdef __cplusplus

	-# define _LIBCPP_VERSION 15003
	+# define _LIBCPP_VERSION 15006

	# define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
	# define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)

	// Valid C++ identifier that revs with every libc++ version. This can be used to
	// generate identifiers that must be unique for every released libc++ version.
	# define _LIBCPP_VERSIONED_IDENTIFIER _LIBCPP_CONCAT(v, _LIBCPP_VERSION)

	# if __STDC_HOSTED__ == 0
	# define _LIBCPP_FREESTANDING
	# endif

	# ifndef _LIBCPP_STD_VER
	# if __cplusplus <= 201103L
	# define _LIBCPP_STD_VER 11
	# elif __cplusplus <= 201402L
	# define _LIBCPP_STD_VER 14
	# elif __cplusplus <= 201703L
	# define _LIBCPP_STD_VER 17
	# elif __cplusplus <= 202002L
	# define _LIBCPP_STD_VER 20
	# else
	# define _LIBCPP_STD_VER 22 // current year, or date of c++2b ratification
	# endif
	# endif // _LIBCPP_STD_VER

	# if defined(__ELF__)
	# define _LIBCPP_OBJECT_FORMAT_ELF 1
	# elif defined(__MACH__)
	# define _LIBCPP_OBJECT_FORMAT_MACHO 1
	# elif defined(_WIN32)
	# define _LIBCPP_OBJECT_FORMAT_COFF 1
	# elif defined(__wasm__)
	# define _LIBCPP_OBJECT_FORMAT_WASM 1
	# elif defined(_AIX)
	# define _LIBCPP_OBJECT_FORMAT_XCOFF 1
	# else
	// ... add new file formats here ...
	# endif

	# if _LIBCPP_ABI_VERSION >= 2
	// Change short string representation so that string data starts at offset 0,
	// improving its alignment in some cases.
	# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
	// Fix deque iterator type in order to support incomplete types.
	# define _LIBCPP_ABI_INCOMPLETE_TYPES_IN_DEQUE
	// Fix undefined behavior in how std::list stores its linked nodes.
	# define _LIBCPP_ABI_LIST_REMOVE_NODE_POINTER_UB
	// Fix undefined behavior in how __tree stores its end and parent nodes.
	# define _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB
	// Fix undefined behavior in how __hash_table stores its pointer types.
	# define _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB
	# define _LIBCPP_ABI_FORWARD_LIST_REMOVE_NODE_POINTER_UB
	# define _LIBCPP_ABI_FIX_UNORDERED_CONTAINER_SIZE_TYPE
	// Define a key function for `bad_function_call` in the library, to centralize
	// its vtable and typeinfo to libc++ rather than having all other libraries
	// using that class define their own copies.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
	// Override the default return value of exception::what() for
	// bad_function_call::what() with a string that is specific to
	// bad_function_call (see http://wg21.link/LWG2233). This is an ABI break
	// because it changes the vtable layout of bad_function_call.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
	// Enable optimized version of __do_get_(un)signed which avoids redundant copies.
	# define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET
	// Give reverse_iterator<T> one data member of type T, not two.
	// Also, in C++17 and later, don't derive iterator types from std::iterator.
	# define _LIBCPP_ABI_NO_ITERATOR_BASES
	// Use the smallest possible integer type to represent the index of the variant.
	// Previously libc++ used "unsigned int" exclusively.
	# define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION
	// Unstable attempt to provide a more optimized std::function
	# define _LIBCPP_ABI_OPTIMIZED_FUNCTION
	// All the regex constants must be distinct and nonzero.
	# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO
	// Re-worked external template instantiations for std::string with a focus on
	// performance and fast-path inlining.
	# define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION
	// Enable clang::trivial_abi on std::unique_ptr.
	# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI
	// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr
	# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI
	// std::random_device holds some state when it uses an implementation that gets
	// entropy from a file (see _LIBCPP_USING_DEV_RANDOM). When switching from this
	// implementation to another one on a platform that has already shipped
	// std::random_device, one needs to retain the same object layout to remain ABI
	// compatible. This switch removes these workarounds for platforms that don't care
	// about ABI compatibility.
	# define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
	// Don't export the legacy __basic_string_common class and its methods from the built library.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
	// Don't export the legacy __vector_base_common class and its methods from the built library.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
	// According to the Standard, `bitset::operator[] const` returns bool
	# define _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL
	// Remove the base 10 implementation of std::to_chars from the dylib.
	// The implementation moved to the header, but we still export the symbols from
	// the dylib for backwards compatibility.
	# define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
	# elif _LIBCPP_ABI_VERSION == 1
	# if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) \|\| defined(_LIBCPP_OBJECT_FORMAT_XCOFF))
	// Enable compiling copies of now inline methods into the dylib to support
	// applications compiled against older libraries. This is unnecessary with
	// COFF dllexport semantics, since dllexport forces a non-inline definition
	// of inline functions to be emitted anyway. Our own non-inline copy would
	// conflict with the dllexport-emitted copy, so we disable it. For XCOFF,
	// the linker will take issue with the symbols in the shared object if the
	// weak inline methods get visibility (such as from -fvisibility-inlines-hidden),
	// so disable it.
	# define _LIBCPP_DEPRECATED_ABI_LEGACY_LIBRARY_DEFINITIONS_FOR_INLINE_FUNCTIONS
	# endif
	// Feature macros for disabling pre ABI v1 features. All of these options
	// are deprecated.
	# if defined(__FreeBSD__)
	# define _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR
	# endif
	# endif

	# if defined(_LIBCPP_BUILDING_LIBRARY) \|\| _LIBCPP_ABI_VERSION >= 2
	// Enable additional explicit instantiations of iostreams components. This
	// reduces the number of weak definitions generated in programs that use
	// iostreams by providing a single strong definition in the shared library.
	# define _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1

	// Define a key function for `bad_function_call` in the library, to centralize
	// its vtable and typeinfo to libc++ rather than having all other libraries
	// using that class define their own copies.
	# define _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
	# endif

	# define _LIBCPP_TOSTRING2(x) # x
	# define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x)

	# if __cplusplus < 201103L
	# define _LIBCPP_CXX03_LANG
	# endif

	# ifndef __has_attribute
	# define __has_attribute(__x) 0
	# endif

	# ifndef __has_builtin
	# define __has_builtin(__x) 0
	# endif

	# ifndef __has_extension
	# define __has_extension(__x) 0
	# endif

	# ifndef __has_feature
	# define __has_feature(__x) 0
	# endif

	# ifndef __has_cpp_attribute
	# define __has_cpp_attribute(__x) 0
	# endif

	// '__is_identifier' returns '0' if '__x' is a reserved identifier provided by
	// the compiler and '1' otherwise.
	# ifndef __is_identifier
	# define __is_identifier(__x) 1
	# endif

	# ifndef __has_declspec_attribute
	# define __has_declspec_attribute(__x) 0
	# endif

	# define __has_keyword(__x) !(__is_identifier(__x))

	# ifndef __has_include
	# define __has_include(...) 0
	# endif

	# if !defined(_LIBCPP_COMPILER_CLANG_BASED) && __cplusplus < 201103L
	# error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11"
	# endif

	# ifdef _LIBCPP_COMPILER_MSVC
	# error If you successfully use libc++ with MSVC please tell the libc++ developers and consider upstreaming your \
	changes. We are not aware of anybody using this configuration and know that at least some code is currently broken. \
	If there are users of this configuration we are happy to provide support.
	# endif

	// FIXME: ABI detection should be done via compiler builtin macros. This
	// is just a placeholder until Clang implements such macros. For now assume
	// that Windows compilers pretending to be MSVC++ target the Microsoft ABI,
	// and allow the user to explicitly specify the ABI to handle cases where this
	// heuristic falls short.
	# if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT)
	# error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined"
	# elif defined(_LIBCPP_ABI_FORCE_ITANIUM)
	# define _LIBCPP_ABI_ITANIUM
	# elif defined(_LIBCPP_ABI_FORCE_MICROSOFT)
	# define _LIBCPP_ABI_MICROSOFT
	# else
	# if defined(_WIN32) && defined(_MSC_VER)
	# define _LIBCPP_ABI_MICROSOFT
	# else
	# define _LIBCPP_ABI_ITANIUM
	# endif
	# endif

	# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME)
	# define _LIBCPP_ABI_VCRUNTIME
	# endif

	# if __has_feature(experimental_library)
	# ifndef _LIBCPP_ENABLE_EXPERIMENTAL
	# define _LIBCPP_ENABLE_EXPERIMENTAL
	# endif
	# endif

	// Incomplete features get their own specific disabling flags. This makes it
	// easier to grep for target specific flags once the feature is complete.
	# if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# define _LIBCPP_HAS_NO_INCOMPLETE_FORMAT
	# define _LIBCPP_HAS_NO_INCOMPLETE_RANGES
	# endif

	// Need to detect which libc we're using if we're on Linux.
	# if defined(__linux__)
	# include <features.h>
	# if defined(__GLIBC_PREREQ)
	# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b)
	# else
	# define _LIBCPP_GLIBC_PREREQ(a, b) 0
	# endif // defined(__GLIBC_PREREQ)
	# endif // defined(__linux__)

	# if defined(__MVS__)
	# include <features.h> // for __NATIVE_ASCII_F
	# endif

	# ifdef __LITTLE_ENDIAN__
	# if __LITTLE_ENDIAN__
	# define _LIBCPP_LITTLE_ENDIAN
	# endif // __LITTLE_ENDIAN__
	# endif // __LITTLE_ENDIAN__

	# ifdef __BIG_ENDIAN__
	# if __BIG_ENDIAN__
	# define _LIBCPP_BIG_ENDIAN
	# endif // __BIG_ENDIAN__
	# endif // __BIG_ENDIAN__

	# ifdef __BYTE_ORDER__
	# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
	# define _LIBCPP_LITTLE_ENDIAN
	# elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	# define _LIBCPP_BIG_ENDIAN
	# endif // __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
	# endif // __BYTE_ORDER__

	# ifdef __FreeBSD__
	# include <sys/endian.h>
	# include <osreldate.h>
	# if _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else // _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# endif // _BYTE_ORDER == _LITTLE_ENDIAN
	# endif // __FreeBSD__

	# if defined(__NetBSD__) \|\| defined(__OpenBSD__)
	# include <sys/endian.h>
	# if _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else // _BYTE_ORDER == _LITTLE_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# endif // _BYTE_ORDER == _LITTLE_ENDIAN
	# endif // defined(__NetBSD__) \|\| defined(__OpenBSD__)

	# if defined(_WIN32)
	# define _LIBCPP_WIN32API
	# define _LIBCPP_LITTLE_ENDIAN
	# define _LIBCPP_SHORT_WCHAR 1
	// Both MinGW and native MSVC provide a "MSVC"-like environment
	# define _LIBCPP_MSVCRT_LIKE
	// If mingw not explicitly detected, assume using MS C runtime only if
	// a MS compatibility version is specified.
	# if defined(_MSC_VER) && !defined(__MINGW32__)
	# define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library
	# endif
	# if (defined(_M_AMD64) \|\| defined(__x86_64__)) \|\| (defined(_M_ARM) \|\| defined(__arm__))
	# define _LIBCPP_HAS_BITSCAN64
	# endif
	# define _LIBCPP_HAS_OPEN_WITH_WCHAR
	# endif // defined(_WIN32)

	# ifdef __sun__
	# include <sys/isa_defs.h>
	# ifdef _LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# else
	# define _LIBCPP_BIG_ENDIAN
	# endif
	# endif // __sun__

	# if defined(_AIX) && !defined(__64BIT__)
	// The size of wchar is 2 byte on 32-bit mode on AIX.
	# define _LIBCPP_SHORT_WCHAR 1
	# endif

	// Libc++ supports various implementations of std::random_device.
	//
	// _LIBCPP_USING_DEV_RANDOM
	// Read entropy from the given file, by default `/dev/urandom`.
	// If a token is provided, it is assumed to be the path to a file
	// to read entropy from. This is the default behavior if nothing
	// else is specified. This implementation requires storing state
	// inside `std::random_device`.
	//
	// _LIBCPP_USING_ARC4_RANDOM
	// Use arc4random(). This allows obtaining random data even when
	// using sandboxing mechanisms. On some platforms like Apple, this
	// is the recommended source of entropy for user-space programs.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_GETENTROPY
	// Use getentropy().
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_FUCHSIA_CPRNG
	// Use Fuchsia's zx_cprng_draw() system call, which is specified to
	// deliver high-quality entropy and cannot fail.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_NACL_RANDOM
	// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access,
	// including accesses to the special files under `/dev`. This implementation
	// uses the NaCL syscall `nacl_secure_random_init()` to get entropy.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	//
	// _LIBCPP_USING_WIN32_RANDOM
	// Use rand_s(), for use on Windows.
	// When this option is used, the token passed to `std::random_device`'s
	// constructor must be "/dev/urandom" -- anything else is an error.
	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(__NetBSD__) \|\| defined(__OpenBSD__) \|\| \
	defined(__DragonFly__) \|\| defined(__sun__)
	# define _LIBCPP_USING_ARC4_RANDOM
	# elif defined(__wasi__) \|\| defined(__EMSCRIPTEN__)
	# define _LIBCPP_USING_GETENTROPY
	# elif defined(__Fuchsia__)
	# define _LIBCPP_USING_FUCHSIA_CPRNG
	# elif defined(__native_client__)
	# define _LIBCPP_USING_NACL_RANDOM
	# elif defined(_LIBCPP_WIN32API)
	# define _LIBCPP_USING_WIN32_RANDOM
	# else
	# define _LIBCPP_USING_DEV_RANDOM
	# endif

	# if !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)
	# include <endian.h>
	# if __BYTE_ORDER == __LITTLE_ENDIAN
	# define _LIBCPP_LITTLE_ENDIAN
	# elif __BYTE_ORDER == __BIG_ENDIAN
	# define _LIBCPP_BIG_ENDIAN
	# else // __BYTE_ORDER == __BIG_ENDIAN
	# error unable to determine endian
	# endif
	# endif // !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN)

	# if __has_attribute(__no_sanitize__) && !defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_NO_CFI __attribute__((__no_sanitize__("cfi")))
	# else
	# define _LIBCPP_NO_CFI
	# endif

	# ifndef _LIBCPP_CXX03_LANG

	# define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp)
	# define _ALIGNAS_TYPE(x) alignas(x)
	# define _ALIGNAS(x) alignas(x)
	# define _LIBCPP_NORETURN [[noreturn]]
	# define _NOEXCEPT noexcept
	# define _NOEXCEPT_(x) noexcept(x)

	# else

	# define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp)
	# define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x))))
	# define _ALIGNAS(x) __attribute__((__aligned__(x)))
	# define _LIBCPP_NORETURN __attribute__((noreturn))
	# define _LIBCPP_HAS_NO_NOEXCEPT
	# define nullptr __nullptr
	# define _NOEXCEPT throw()
	# define _NOEXCEPT_(x)

	typedef __char16_t char16_t;
	typedef __char32_t char32_t;

	# endif

	# if !defined(__cpp_exceptions) \|\| __cpp_exceptions < 199711L
	# define _LIBCPP_NO_EXCEPTIONS
	# endif

	# define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp)

	# if defined(_LIBCPP_COMPILER_CLANG_BASED)

	# if defined(__APPLE__) && !defined(__i386__) && !defined(__x86_64__) && (!defined(__arm__) \|\| __ARM_ARCH_7K__ >= 2)
	# define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
	# endif

	// Objective-C++ features (opt-in)
	# if __has_feature(objc_arc)
	# define _LIBCPP_HAS_OBJC_ARC
	# endif

	# if __has_feature(objc_arc_weak)
	# define _LIBCPP_HAS_OBJC_ARC_WEAK
	# endif

	# if __has_extension(blocks)
	# define _LIBCPP_HAS_EXTENSION_BLOCKS
	# endif

	# if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__)
	# define _LIBCPP_HAS_BLOCKS_RUNTIME
	# endif

	# if !__has_feature(address_sanitizer)
	# define _LIBCPP_HAS_NO_ASAN
	# endif

	// Allow for build-time disabling of unsigned integer sanitization
	# if __has_attribute(no_sanitize)
	# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK __attribute__((__no_sanitize__("unsigned-integer-overflow")))
	# endif

	# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))

	# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__

	# elif defined(_LIBCPP_COMPILER_GCC)

	# if !defined(__SANITIZE_ADDRESS__)
	# define _LIBCPP_HAS_NO_ASAN
	# endif

	# define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__))

	# define _LIBCPP_DISABLE_EXTENSION_WARNING __extension__

	# elif defined(_LIBCPP_COMPILER_MSVC)

	# define _LIBCPP_WARNING(x) __pragma(message(__FILE__ "(" _LIBCPP_TOSTRING(__LINE__) ") : warning note: " x))

	# if _MSC_VER < 1900
	# error "MSVC versions prior to Visual Studio 2015 are not supported"
	# endif

	# define _LIBCPP_NORETURN __declspec(noreturn)

	# define _LIBCPP_WEAK

	# define _LIBCPP_HAS_NO_ASAN

	# define _LIBCPP_ALWAYS_INLINE __forceinline

	# define _LIBCPP_HAS_NO_VECTOR_EXTENSION

	# define _LIBCPP_DISABLE_EXTENSION_WARNING

	# endif // _LIBCPP_COMPILER_[CLANG\|GCC\|MSVC]

	# if defined(_LIBCPP_OBJECT_FORMAT_COFF)

	# ifdef _DLL
	# define _LIBCPP_CRT_FUNC __declspec(dllimport)
	# else
	# define _LIBCPP_CRT_FUNC
	# endif

	# if defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) \|\| (defined(__MINGW32__) && !defined(_LIBCPP_BUILDING_LIBRARY))
	# define _LIBCPP_DLL_VIS
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI
	# elif defined(_LIBCPP_BUILDING_LIBRARY)
	# define _LIBCPP_DLL_VIS __declspec(dllexport)
	# if defined(__MINGW32__)
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# else
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS
	# endif
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport)
	# else
	# define _LIBCPP_DLL_VIS __declspec(dllimport)
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllimport)
	# endif

	# define _LIBCPP_TYPE_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_FUNC_VIS _LIBCPP_DLL_VIS
	# define _LIBCPP_EXCEPTION_ABI _LIBCPP_DLL_VIS
	# define _LIBCPP_HIDDEN
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	# define _LIBCPP_TEMPLATE_VIS
	# define _LIBCPP_TEMPLATE_DATA_VIS
	# define _LIBCPP_ENUM_VIS

	# else

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# define _LIBCPP_VISIBILITY(vis) __attribute__((__visibility__(vis)))
	# else
	# define _LIBCPP_VISIBILITY(vis)
	# endif

	# define _LIBCPP_HIDDEN _LIBCPP_VISIBILITY("hidden")
	# define _LIBCPP_FUNC_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_TYPE_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_TEMPLATE_DATA_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXCEPTION_ABI _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_VISIBILITY("default")
	# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS

	// TODO: Make this a proper customization point or remove the option to override it.
	# ifndef _LIBCPP_OVERRIDABLE_FUNC_VIS
	# define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_VISIBILITY("default")
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	// The inline should be removed once PR32114 is resolved
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS inline _LIBCPP_HIDDEN
	# else
	# define _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# if __has_attribute(__type_visibility__)
	# define _LIBCPP_TEMPLATE_VIS __attribute__((__type_visibility__("default")))
	# else
	# define _LIBCPP_TEMPLATE_VIS __attribute__((__visibility__("default")))
	# endif
	# else
	# define _LIBCPP_TEMPLATE_VIS
	# endif

	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__)
	# define _LIBCPP_ENUM_VIS __attribute__((__type_visibility__("default")))
	# else
	# define _LIBCPP_ENUM_VIS
	# endif

	# endif // defined(_LIBCPP_OBJECT_FORMAT_COFF)

	# if __has_attribute(exclude_from_explicit_instantiation)
	# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION __attribute__((__exclude_from_explicit_instantiation__))
	# else
	// Try to approximate the effect of exclude_from_explicit_instantiation
	// (which is that entities are not assumed to be provided by explicit
	// template instantiations in the dylib) by always inlining those entities.
	# define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE
	# endif

	// This macro marks a symbol as being hidden from libc++'s ABI. This is achieved
	// on two levels:
	// 1. The symbol is given hidden visibility, which ensures that users won't start exporting
	// symbols from their dynamic library by means of using the libc++ headers. This ensures
	// that those symbols stay private to the dynamic library in which it is defined.
	//
	// 2. The symbol is given an ABI tag that changes with each version of libc++. This ensures
	// that no ODR violation can arise from mixing two TUs compiled with different versions
	// of libc++ where we would have changed the definition of a symbol. If the symbols shared
	// the same name, the ODR would require that their definitions be token-by-token equivalent,
	// which basically prevents us from being able to make any change to any function in our
	// headers. Using this ABI tag ensures that the symbol name is "bumped" artificially at
	// each release, which lets us change the definition of these symbols at our leisure.
	// Note that historically, this has been achieved in various ways, including force-inlining
	// all functions or giving internal linkage to all functions. Both these (previous) solutions
	// suffer from drawbacks that lead notably to code bloat.
	//
	// Note that we use _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION to ensure that we don't depend
	// on _LIBCPP_HIDE_FROM_ABI methods of classes explicitly instantiated in the dynamic library.
	//
	// TODO: We provide a escape hatch with _LIBCPP_NO_ABI_TAG for folks who want to avoid increasing
	// the length of symbols with an ABI tag. In practice, we should remove the escape hatch and
	// use compression mangling instead, see https://github.com/itanium-cxx-abi/cxx-abi/issues/70.
	# ifndef _LIBCPP_NO_ABI_TAG
	# define _LIBCPP_HIDE_FROM_ABI \
	_LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION \
	__attribute__((__abi_tag__(_LIBCPP_TOSTRING(_LIBCPP_VERSIONED_IDENTIFIER))))
	# else
	# define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION
	# endif

	# ifdef _LIBCPP_BUILDING_LIBRARY
	# if _LIBCPP_ABI_VERSION > 1
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
	# else
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1
	# endif
	# else
	# define _LIBCPP_HIDE_FROM_ABI_AFTER_V1 _LIBCPP_HIDE_FROM_ABI
	# endif

	// Just so we can migrate to the new macros gradually.
	# define _LIBCPP_INLINE_VISIBILITY _LIBCPP_HIDE_FROM_ABI

	// Inline namespaces are available in Clang/GCC/MSVC regardless of C++ dialect.
	// clang-format off
	# define _LIBCPP_BEGIN_NAMESPACE_STD namespace std { inline namespace _LIBCPP_ABI_NAMESPACE {
	# define _LIBCPP_END_NAMESPACE_STD }}
	# define _VSTD std

	_LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
	_LIBCPP_BEGIN_NAMESPACE_STD inline namespace __fs { namespace filesystem {
	# else
	# define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM \
	_LIBCPP_BEGIN_NAMESPACE_STD namespace __fs { namespace filesystem {
	# endif

	# define _LIBCPP_END_NAMESPACE_FILESYSTEM _LIBCPP_END_NAMESPACE_STD }}
	// clang-format on

	# define _VSTD_FS std::__fs::filesystem

	# if __has_attribute(__enable_if__)
	# define _LIBCPP_PREFERRED_OVERLOAD __attribute__((__enable_if__(true, "")))
	# endif

	# ifndef __SIZEOF_INT128__
	# define _LIBCPP_HAS_NO_INT128
	# endif

	# ifdef _LIBCPP_CXX03_LANG
	# define static_assert(...) _Static_assert(__VA_ARGS__)
	# define decltype(...) __decltype(__VA_ARGS__)
	# endif // _LIBCPP_CXX03_LANG

	# ifdef _LIBCPP_CXX03_LANG
	# define _LIBCPP_CONSTEXPR
	# else
	# define _LIBCPP_CONSTEXPR constexpr
	# endif

	# ifndef __cpp_consteval
	# define _LIBCPP_CONSTEVAL _LIBCPP_CONSTEXPR
	# else
	# define _LIBCPP_CONSTEVAL consteval
	# endif

	# ifdef __GNUC__
	# define _LIBCPP_NOALIAS __attribute__((__malloc__))
	# else
	# define _LIBCPP_NOALIAS
	# endif

	# if __has_attribute(using_if_exists)
	# define _LIBCPP_USING_IF_EXISTS __attribute__((using_if_exists))
	# else
	# define _LIBCPP_USING_IF_EXISTS
	# endif

	# ifdef _LIBCPP_CXX03_LANG
	# define _LIBCPP_DECLARE_STRONG_ENUM(x) \
	struct _LIBCPP_TYPE_VIS x { \
	enum __lx
	// clang-format off
	# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \
	__lx __v_; \
	_LIBCPP_INLINE_VISIBILITY x(__lx __v) : __v_(__v) {} \
	_LIBCPP_INLINE_VISIBILITY explicit x(int __v) : __v_(static_cast<__lx>(__v)) {} \
	_LIBCPP_INLINE_VISIBILITY operator int() const { return __v_; } \
	};
	// clang-format on

	# else // _LIBCPP_CXX03_LANG
	# define _LIBCPP_DECLARE_STRONG_ENUM(x) enum class _LIBCPP_ENUM_VIS x
	# define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x)
	# endif // _LIBCPP_CXX03_LANG

	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(_LIBCPP_MSVCRT_LIKE) \|\| defined(__sun__) \|\| \
	defined(__NetBSD__)
	# define _LIBCPP_LOCALE__L_EXTENSIONS 1
	# endif

	# ifdef __FreeBSD__
	# define _DECLARE_C99_LDBL_MATH 1
	# endif

	// If we are getting operator new from the MSVC CRT, then allocation overloads
	// for align_val_t were added in 19.12, aka VS 2017 version 15.3.
	# if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new)
	// We're deferring to Microsoft's STL to provide aligned new et al. We don't
	// have it unless the language feature test macro is defined.
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# elif defined(__MVS__)
	# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION
	# endif

	# if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) \|\| (!defined(__cpp_aligned_new) \|\| __cpp_aligned_new < 201606)
	# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION
	# endif

	# if defined(__APPLE__) \|\| defined(__FreeBSD__)
	# define _LIBCPP_HAS_DEFAULTRUNELOCALE
	# endif

	# if defined(__APPLE__) \|\| defined(__FreeBSD__) \|\| defined(__sun__)
	# define _LIBCPP_WCTYPE_IS_MASK
	# endif

	# if _LIBCPP_STD_VER <= 17 \|\| !defined(__cpp_char8_t)
	# define _LIBCPP_HAS_NO_CHAR8_T
	# endif

	// Deprecation macros.
	//
	// Deprecations warnings are always enabled, except when users explicitly opt-out
	// by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS.
	# if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
	# if __has_attribute(deprecated)
	# define _LIBCPP_DEPRECATED __attribute__((deprecated))
	# define _LIBCPP_DEPRECATED_(m) __attribute__((deprected(m)))
	# elif _LIBCPP_STD_VER > 11
	# define _LIBCPP_DEPRECATED [[deprecated]]
	# define _LIBCPP_DEPRECATED_(m) [[deprecated(m)]]
	# else
	# define _LIBCPP_DEPRECATED
	# define _LIBCPP_DEPRECATED_(m)
	# endif
	# else
	# define _LIBCPP_DEPRECATED
	# define _LIBCPP_DEPRECATED_(m)
	# endif

	# if !defined(_LIBCPP_CXX03_LANG)
	# define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX11
	# endif

	# if _LIBCPP_STD_VER > 11
	# define _LIBCPP_DEPRECATED_IN_CXX14 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX14
	# endif

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX17
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_IN_CXX20
	# endif

	# if !defined(_LIBCPP_HAS_NO_CHAR8_T)
	# define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED
	# else
	# define _LIBCPP_DEPRECATED_WITH_CHAR8_T
	# endif

	// Macros to enter and leave a state where deprecation warnings are suppressed.
	# if defined(_LIBCPP_COMPILER_CLANG_BASED) \|\| defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH \
	_Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated\"") \
	_Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
	# define _LIBCPP_SUPPRESS_DEPRECATED_POP _Pragma("GCC diagnostic pop")
	# else
	# define _LIBCPP_SUPPRESS_DEPRECATED_PUSH
	# define _LIBCPP_SUPPRESS_DEPRECATED_POP
	# endif

	# if _LIBCPP_STD_VER <= 11
	# define _LIBCPP_EXPLICIT_AFTER_CXX11
	# else
	# define _LIBCPP_EXPLICIT_AFTER_CXX11 explicit
	# endif

	# if _LIBCPP_STD_VER > 11
	# define _LIBCPP_CONSTEXPR_AFTER_CXX11 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_AFTER_CXX11
	# endif

	# if _LIBCPP_STD_VER > 14
	# define _LIBCPP_CONSTEXPR_AFTER_CXX14 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_AFTER_CXX14
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_CONSTEXPR_AFTER_CXX17 constexpr
	# else
	# define _LIBCPP_CONSTEXPR_AFTER_CXX17
	# endif

	# if __has_cpp_attribute(nodiscard) \|\| defined(_LIBCPP_COMPILER_MSVC)
	# define _LIBCPP_NODISCARD [[nodiscard]]
	# elif defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(_LIBCPP_CXX03_LANG)
	# define _LIBCPP_NODISCARD [[clang::warn_unused_result]]
	# else
	// We can't use GCC's [[gnu::warn_unused_result]] and
	// __attribute__((warn_unused_result)), because GCC does not silence them via
	// (void) cast.
	# define _LIBCPP_NODISCARD
	# endif

	// _LIBCPP_NODISCARD_EXT may be used to apply [[nodiscard]] to entities not
	// specified as such as an extension.
	# if defined(_LIBCPP_ENABLE_NODISCARD) && !defined(_LIBCPP_DISABLE_NODISCARD_EXT)
	# define _LIBCPP_NODISCARD_EXT _LIBCPP_NODISCARD
	# else
	# define _LIBCPP_NODISCARD_EXT
	# endif

	# if !defined(_LIBCPP_DISABLE_NODISCARD_AFTER_CXX17) && (_LIBCPP_STD_VER > 17 \|\| defined(_LIBCPP_ENABLE_NODISCARD))
	# define _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_NODISCARD
	# else
	# define _LIBCPP_NODISCARD_AFTER_CXX17
	# endif

	# if __has_attribute(no_destroy)
	# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__))
	# else
	# define _LIBCPP_NO_DESTROY
	# endif

	# ifndef _LIBCPP_HAS_NO_ASAN
	extern "C" _LIBCPP_FUNC_VIS void
	__sanitizer_annotate_contiguous_container(const void, const void, const void, const void);
	# endif

	// Try to find out if RTTI is disabled.
	# if !defined(__cpp_rtti) \|\| __cpp_rtti < 199711L
	# define _LIBCPP_NO_RTTI
	# endif

	# ifndef _LIBCPP_WEAK
	# define _LIBCPP_WEAK __attribute__((__weak__))
	# endif

	// Thread API
	// clang-format off
	# if !defined(_LIBCPP_HAS_NO_THREADS) && \
	!defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \
	!defined(_LIBCPP_HAS_THREAD_API_WIN32) && \
	!defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)

	# if defined(__FreeBSD__) \|\| \
	defined(__wasi__) \|\| \
	defined(__NetBSD__) \|\| \
	defined(__OpenBSD__) \|\| \
	defined(__NuttX__) \|\| \
	defined(__linux__) \|\| \
	defined(__GNU__) \|\| \
	defined(__APPLE__) \|\| \
	defined(__sun__) \|\| \
	defined(__MVS__) \|\| \
	defined(_AIX) \|\| \
	defined(__EMSCRIPTEN__)
	// clang-format on
	# define _LIBCPP_HAS_THREAD_API_PTHREAD
	# elif defined(__Fuchsia__)
	// TODO(44575): Switch to C11 thread API when possible.
	# define _LIBCPP_HAS_THREAD_API_PTHREAD
	# elif defined(_LIBCPP_WIN32API)
	# define _LIBCPP_HAS_THREAD_API_WIN32
	# else
	# error "No thread API"
	# endif // _LIBCPP_HAS_THREAD_API
	# endif // _LIBCPP_HAS_NO_THREADS

	# if defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
	# if defined(__ANDROID__) && __ANDROID_API__ >= 30
	# define _LIBCPP_HAS_COND_CLOCKWAIT
	# elif defined(_LIBCPP_GLIBC_PREREQ)
	# if _LIBCPP_GLIBC_PREREQ(2, 30)
	# define _LIBCPP_HAS_COND_CLOCKWAIT
	# endif
	# endif
	# endif

	# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD)
	# error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \
	_LIBCPP_HAS_NO_THREADS is not defined.
	# endif

	# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL)
	# error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \
	_LIBCPP_HAS_NO_THREADS is defined.
	# endif

	# if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS)
	# error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \
	_LIBCPP_HAS_NO_THREADS is defined.
	# endif

	# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__)
	# define __STDCPP_THREADS__ 1
	# endif

	// The glibc and Bionic implementation of pthreads implements
	// pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32
	// mutexes have no destroy mechanism.
	//
	// This optimization can't be performed on Apple platforms, where
	// pthread_mutex_destroy can allow the kernel to release resources.
	// See https://llvm.org/D64298 for details.
	//
	// TODO(EricWF): Enable this optimization on Bionic after speaking to their
	// respective stakeholders.
	// clang-format off
	# if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) \|\| \
	(defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) \|\| \
	defined(_LIBCPP_HAS_THREAD_API_WIN32)
	// clang-format on
	# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION
	# endif

	// Destroying a condvar is a nop on Windows.
	//
	// This optimization can't be performed on Apple platforms, where
	// pthread_cond_destroy can allow the kernel to release resources.
	// See https://llvm.org/D64298 for details.
	//
	// TODO(EricWF): This is potentially true for some pthread implementations
	// as well.
	# if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) \|\| defined(_LIBCPP_HAS_THREAD_API_WIN32)
	# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION
	# endif

	// Some systems do not provide gets() in their C library, for security reasons.
	# if defined(_LIBCPP_MSVCRT) \|\| (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) \|\| defined(__OpenBSD__)
	# define _LIBCPP_C_HAS_NO_GETS
	# endif

	# if defined(__BIONIC__) \|\| defined(__NuttX__) \|\| defined(__Fuchsia__) \|\| defined(__wasi__) \|\| \
	defined(_LIBCPP_HAS_MUSL_LIBC) \|\| defined(__OpenBSD__)
	# define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
	# endif

	# if __has_feature(cxx_atomic) \|\| __has_extension(c_atomic) \|\| __has_keyword(_Atomic)
	# define _LIBCPP_HAS_C_ATOMIC_IMP
	# elif defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_HAS_GCC_ATOMIC_IMP
	# endif

	# if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \
	!defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)
	# define _LIBCPP_HAS_NO_ATOMIC_HEADER
	# else
	# ifndef _LIBCPP_ATOMIC_FLAG_TYPE
	# define _LIBCPP_ATOMIC_FLAG_TYPE bool
	# endif
	# ifdef _LIBCPP_FREESTANDING
	# define _LIBCPP_ATOMIC_ONLY_USE_BUILTINS
	# endif
	# endif

	# ifndef _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	# define _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
	# endif

	# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS)
	# if defined(__clang__) && __has_attribute(acquire_capability)
	// Work around the attribute handling in clang. When both __declspec and
	// __attribute__ are present, the processing goes awry preventing the definition
	// of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus
	// combining the two does work.
	# if !defined(_MSC_VER)
	# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
	# endif
	# endif
	# endif

	# ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS
	# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x))
	# else
	# define _LIBCPP_THREAD_SAFETY_ANNOTATION(x)
	# endif

	# if _LIBCPP_STD_VER > 17
	# define _LIBCPP_CONSTINIT constinit
	# elif __has_attribute(require_constant_initialization)
	# define _LIBCPP_CONSTINIT __attribute__((__require_constant_initialization__))
	# else
	# define _LIBCPP_CONSTINIT
	# endif

	# if __has_attribute(diagnose_if) && !defined(_LIBCPP_DISABLE_ADDITIONAL_DIAGNOSTICS)
	# define _LIBCPP_DIAGNOSE_WARNING(...) __attribute__((diagnose_if(__VA_ARGS__, "warning")))
	# define _LIBCPP_DIAGNOSE_ERROR(...) __attribute__((diagnose_if(__VA_ARGS__, "error")))
	# else
	# define _LIBCPP_DIAGNOSE_WARNING(...)
	# define _LIBCPP_DIAGNOSE_ERROR(...)
	# endif

	// Use a function like macro to imply that it must be followed by a semicolon
	# if __has_cpp_attribute(fallthrough)
	# define _LIBCPP_FALLTHROUGH() [[fallthrough]]
	# elif __has_attribute(__fallthrough__)
	# define _LIBCPP_FALLTHROUGH() __attribute__((__fallthrough__))
	# else
	# define _LIBCPP_FALLTHROUGH() ((void)0)
	# endif

	# if __has_attribute(__nodebug__)
	# define _LIBCPP_NODEBUG __attribute__((__nodebug__))
	# else
	# define _LIBCPP_NODEBUG
	# endif

	# if __has_attribute(__standalone_debug__)
	# define _LIBCPP_STANDALONE_DEBUG __attribute__((__standalone_debug__))
	# else
	# define _LIBCPP_STANDALONE_DEBUG
	# endif

	# if __has_attribute(__preferred_name__)
	# define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x)))
	# else
	# define _LIBCPP_PREFERRED_NAME(x)
	# endif

	// We often repeat things just for handling wide characters in the library.
	// When wide characters are disabled, it can be useful to have a quick way of
	// disabling it without having to resort to #if-#endif, which has a larger
	// impact on readability.
	# if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)
	# define _LIBCPP_IF_WIDE_CHARACTERS(...)
	# else
	# define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__
	# endif

	# if defined(_LIBCPP_ABI_MICROSOFT) && (defined(_LIBCPP_COMPILER_MSVC) \|\| __has_declspec_attribute(empty_bases))
	# define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases)
	# else
	# define _LIBCPP_DECLSPEC_EMPTY_BASES
	# endif

	# if defined(_LIBCPP_ENABLE_CXX17_REMOVED_FEATURES)
	# define _LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR
	# define _LIBCPP_ENABLE_CXX17_REMOVED_BINDERS
	# define _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
	# define _LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS
	# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
	# endif // _LIBCPP_ENABLE_CXX17_REMOVED_FEATURES

	// Leave the deprecation notices in by default, but don't remove unary_function and
	// binary_function entirely just yet. That way, folks will have one release to act
	// on the deprecation warnings.
	# ifndef _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
	# define _LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION
	# endif

	# if defined(_LIBCPP_ENABLE_CXX20_REMOVED_FEATURES)
	# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_VOID_SPECIALIZATION
	# define _LIBCPP_ENABLE_CXX20_REMOVED_BINDER_TYPEDEFS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_NEGATORS
	# define _LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR
	# define _LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS
	# endif // _LIBCPP_ENABLE_CXX20_REMOVED_FEATURES

	# if !defined(__cpp_impl_coroutine) \|\| __cpp_impl_coroutine < 201902L
	# define _LIBCPP_HAS_NO_CXX20_COROUTINES
	# endif

	# define _LIBCPP_PUSH_MACROS _Pragma("push_macro(\"min\")") _Pragma("push_macro(\"max\")")
	# define _LIBCPP_POP_MACROS _Pragma("pop_macro(\"min\")") _Pragma("pop_macro(\"max\")")

	# ifndef _LIBCPP_NO_AUTO_LINK
	# if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
	# pragma comment(lib, "c++.lib")
	# else
	# pragma comment(lib, "libc++.lib")
	# endif
	# endif // defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_BUILDING_LIBRARY)
	# endif // _LIBCPP_NO_AUTO_LINK

	// Configures the fopen close-on-exec mode character, if any. This string will
	// be appended to any mode string used by fstream for fopen/fdopen.
	//
	// Not all platforms support this, but it helps avoid fd-leaks on platforms that
	// do.
	# if defined(__BIONIC__)
	# define _LIBCPP_FOPEN_CLOEXEC_MODE "e"
	# else
	# define _LIBCPP_FOPEN_CLOEXEC_MODE
	# endif

	// Support for _FILE_OFFSET_BITS=64 landed gradually in Android, so the full set
	// of functions used in cstdio may not be available for low API levels when
	// using 64-bit file offsets on LP32.
	# if defined(__BIONIC__) && defined(__USE_FILE_OFFSET64) && __ANDROID_API__ < 24
	# define _LIBCPP_HAS_NO_FGETPOS_FSETPOS
	# endif

	# if __has_attribute(init_priority)
	// TODO: Remove this once we drop support for building libc++ with old Clangs
	# if (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER < 1200) \|\| \
	(defined(__apple_build_version__) && __apple_build_version__ < 13000000)
	# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(101)))
	# else
	# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(100)))
	# endif
	# else
	# define _LIBCPP_INIT_PRIORITY_MAX
	# endif

	# if defined(__GNUC__) \|\| defined(__clang__)
	// The attribute uses 1-based indices for ordinary and static member functions.
	// The attribute uses 2-based indices for non-static member functions.
	# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) \
	__attribute__((__format__(archetype, format_string_index, first_format_arg_index)))
	# else
	# define _LIBCPP_ATTRIBUTE_FORMAT(archetype, format_string_index, first_format_arg_index) /* nothing */
	# endif

	# if __has_cpp_attribute(msvc::no_unique_address)
	// MSVC implements [[no_unique_address]] as a silent no-op currently.
	// (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.)
	// However, MSVC implements [[msvc::no_unique_address]] which does what
	// [[no_unique_address]] is supposed to do, in general.

	// Clang-cl does not yet (14.0) implement either [[no_unique_address]] or
	// [[msvc::no_unique_address]] though. If/when it does implement
	// [[msvc::no_unique_address]], this should be preferred though.
	# define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]]
	# elif __has_cpp_attribute(no_unique_address)
	# define _LIBCPP_NO_UNIQUE_ADDRESS [[no_unique_address]]
	# else
	# define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */
	// Note that this can be replaced by #error as soon as clang-cl
	// implements msvc::no_unique_address, since there should be no C++20
	// compiler that doesn't support one of the two attributes at that point.
	// We generally don't want to use this macro outside of C++20-only code,
	// because using it conditionally in one language version only would make
	// the ABI inconsistent.
	# endif

	# ifdef _LIBCPP_COMPILER_CLANG_BASED
	# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push")
	# define _LIBCPP_DIAGNOSTIC_POP _Pragma("clang diagnostic pop")
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(clang diagnostic ignored str))
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
	# elif defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push")
	# define _LIBCPP_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop")
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str) _Pragma(_LIBCPP_TOSTRING(GCC diagnostic ignored str))
	# else
	# define _LIBCPP_DIAGNOSTIC_PUSH
	# define _LIBCPP_DIAGNOSTIC_POP
	# define _LIBCPP_CLANG_DIAGNOSTIC_IGNORED(str)
	# define _LIBCPP_GCC_DIAGNOSTIC_IGNORED(str)
	# endif

	# if defined(_AIX) && !defined(_LIBCPP_COMPILER_GCC)
	# define _LIBCPP_PACKED_BYTE_FOR_AIX _Pragma("pack(1)")
	# define _LIBCPP_PACKED_BYTE_FOR_AIX_END _Pragma("pack(pop)")
	# else
	# define _LIBCPP_PACKED_BYTE_FOR_AIX /* empty */
	# define _LIBCPP_PACKED_BYTE_FOR_AIX_END /* empty */
	# endif

	# if __has_attribute(__packed__)
	# define _LIBCPP_PACKED __attribute__((__packed__))
	# else
	# define _LIBCPP_PACKED
	# endif

	#endif // __cplusplus

	#endif // _LIBCPP___CONFIG
	diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h
	index db3af6e24101..55b607f3f804 100644
	--- a/libcxx/include/__functional/function.h
	+++ b/libcxx/include/__functional/function.h
	@@ -1,2813 +1,2823 @@
	// -- C++ --
	//===----------------------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef _LIBCPP___FUNCTIONAL_FUNCTION_H
	#define _LIBCPP___FUNCTIONAL_FUNCTION_H

	#include <__assert>
	#include <__config>
	#include <__functional/binary_function.h>
	#include <__functional/invoke.h>
	#include <__functional/unary_function.h>
	#include <__iterator/iterator_traits.h>
	#include <__memory/addressof.h>
	#include <__memory/allocator_traits.h>
	#include <__memory/compressed_pair.h>
	#include <__memory/shared_ptr.h>
	#include <__utility/forward.h>
	#include <__utility/move.h>
	#include <__utility/swap.h>
	#include <exception>
	#include <memory> // TODO: replace with <__memory/__builtin_new_allocator.h>
	#include <type_traits>

	#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
	# pragma GCC system_header
	#endif

	_LIBCPP_BEGIN_NAMESPACE_STD

	// bad_function_call

	_LIBCPP_DIAGNOSTIC_PUSH
	_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables")
	class _LIBCPP_EXCEPTION_ABI bad_function_call
	: public exception
	{
	public:
	// Note that when a key function is not used, every translation unit that uses
	// bad_function_call will end up containing a weak definition of the vtable and
	// typeinfo.
	#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_KEY_FUNCTION
	virtual ~bad_function_call() _NOEXCEPT;
	#else
	virtual ~bad_function_call() _NOEXCEPT {}
	#endif

	#ifdef _LIBCPP_ABI_BAD_FUNCTION_CALL_GOOD_WHAT_MESSAGE
	virtual const char* what() const _NOEXCEPT;
	#endif
	};
	_LIBCPP_DIAGNOSTIC_POP

	_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY
	void __throw_bad_function_call()
	{
	#ifndef _LIBCPP_NO_EXCEPTIONS
	throw bad_function_call();
	#else
	_VSTD::abort();
	#endif
	}

	#if defined(_LIBCPP_CXX03_LANG) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) && __has_attribute(deprecated)
	# define _LIBCPP_DEPRECATED_CXX03_FUNCTION \
	__attribute__((deprecated("Using std::function in C++03 is not supported anymore. Please upgrade to C++11 or later, or use a different type")))
	#else
	# define _LIBCPP_DEPRECATED_CXX03_FUNCTION /* nothing */
	#endif

	template<class _Fp> class _LIBCPP_DEPRECATED_CXX03_FUNCTION _LIBCPP_TEMPLATE_VIS function; // undefined

	namespace __function
	{

	template<class _Rp>
	struct __maybe_derive_from_unary_function
	{
	};

	template<class _Rp, class _A1>
	struct __maybe_derive_from_unary_function<_Rp(_A1)>
	: public __unary_function<_A1, _Rp>
	{
	};

	template<class _Rp>
	struct __maybe_derive_from_binary_function
	{
	};

	template<class _Rp, class _A1, class _A2>
	struct __maybe_derive_from_binary_function<_Rp(_A1, _A2)>
	: public __binary_function<_A1, _A2, _Rp>
	{
	};

	template <class _Fp>
	_LIBCPP_INLINE_VISIBILITY
	bool __not_null(_Fp const&) { return true; }

	template <class _Fp>
	_LIBCPP_INLINE_VISIBILITY
	bool __not_null(_Fp* __ptr) { return __ptr; }

	template <class _Ret, class _Class>
	_LIBCPP_INLINE_VISIBILITY
	bool __not_null(_Ret _Class::*__ptr) { return __ptr; }

	template <class _Fp>
	_LIBCPP_INLINE_VISIBILITY
	bool __not_null(function<_Fp> const& __f) { return !!__f; }

	#ifdef _LIBCPP_HAS_EXTENSION_BLOCKS
	template <class _Rp, class ..._Args>
	_LIBCPP_INLINE_VISIBILITY
	bool __not_null(_Rp (^__p)(_Args...)) { return __p; }
	#endif

	} // namespace __function

	#ifndef _LIBCPP_CXX03_LANG

	namespace __function {

	// __alloc_func holds a functor and an allocator.

	template <class _Fp, class _Ap, class _FB> class __alloc_func;
	template <class _Fp, class _FB>
	class __default_alloc_func;

	template <class _Fp, class _Ap, class _Rp, class... _ArgTypes>
	class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)>
	{
	__compressed_pair<_Fp, _Ap> __f_;

	public:
	typedef _LIBCPP_NODEBUG _Fp _Target;
	typedef _LIBCPP_NODEBUG _Ap _Alloc;

	_LIBCPP_INLINE_VISIBILITY
	const _Target& __target() const { return __f_.first(); }

	// WIN32 APIs may define __allocator, so use __get_allocator instead.
	_LIBCPP_INLINE_VISIBILITY
	const _Alloc& __get_allocator() const { return __f_.second(); }

	_LIBCPP_INLINE_VISIBILITY
	explicit __alloc_func(_Target&& __f)
	: __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)),
	_VSTD::forward_as_tuple())
	{
	}

	_LIBCPP_INLINE_VISIBILITY
	explicit __alloc_func(const _Target& __f, const _Alloc& __a)
	: __f_(piecewise_construct, _VSTD::forward_as_tuple(__f),
	_VSTD::forward_as_tuple(__a))
	{
	}

	_LIBCPP_INLINE_VISIBILITY
	explicit __alloc_func(const _Target& __f, _Alloc&& __a)
	: __f_(piecewise_construct, _VSTD::forward_as_tuple(__f),
	_VSTD::forward_as_tuple(_VSTD::move(__a)))
	{
	}

	_LIBCPP_INLINE_VISIBILITY
	explicit __alloc_func(_Target&& __f, _Alloc&& __a)
	: __f_(piecewise_construct, _VSTD::forward_as_tuple(_VSTD::move(__f)),
	_VSTD::forward_as_tuple(_VSTD::move(__a)))
	{
	}

	_LIBCPP_INLINE_VISIBILITY
	_Rp operator()(_ArgTypes&&... __arg)
	{
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_.first(),
	_VSTD::forward<_ArgTypes>(__arg)...);
	}

	_LIBCPP_INLINE_VISIBILITY
	__alloc_func* __clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef
	typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type
	_AA;
	_AA __a(__f_.second());
	typedef __allocator_destructor<_AA> _Dp;
	unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __alloc_func(__f_.first(), _Alloc(__a));
	return __hold.release();
	}

	_LIBCPP_INLINE_VISIBILITY
	void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); }

	static void __destroy_and_delete(__alloc_func* __f) {
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __alloc_func>::type
	_FunAlloc;
	_FunAlloc __a(__f->__get_allocator());
	__f->destroy();
	__a.deallocate(__f, 1);
	}
	};

	template <class _Fp, class _Rp, class... _ArgTypes>
	class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> {
	_Fp __f_;

	public:
	typedef _LIBCPP_NODEBUG _Fp _Target;

	_LIBCPP_INLINE_VISIBILITY
	const _Target& __target() const { return __f_; }

	_LIBCPP_INLINE_VISIBILITY
	explicit __default_alloc_func(_Target&& __f) : __f_(_VSTD::move(__f)) {}

	_LIBCPP_INLINE_VISIBILITY
	explicit __default_alloc_func(const _Target& __f) : __f_(__f) {}

	_LIBCPP_INLINE_VISIBILITY
	_Rp operator()(_ArgTypes&&... __arg) {
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_, _VSTD::forward<_ArgTypes>(__arg)...);
	}

	_LIBCPP_INLINE_VISIBILITY
	__default_alloc_func* __clone() const {
	__builtin_new_allocator::__holder_t __hold =
	__builtin_new_allocator::__allocate_type<__default_alloc_func>(1);
	__default_alloc_func* __res =
	::new ((void*)__hold.get()) __default_alloc_func(__f_);
	(void)__hold.release();
	return __res;
	}

	_LIBCPP_INLINE_VISIBILITY
	void destroy() _NOEXCEPT { __f_.~_Target(); }

	static void __destroy_and_delete(__default_alloc_func* __f) {
	__f->destroy();
	__builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1);
	}
	};

	// __base provides an abstract interface for copyable functors.

	template<class _Fp> class _LIBCPP_TEMPLATE_VIS __base;

	template<class _Rp, class ..._ArgTypes>
	class __base<_Rp(_ArgTypes...)>
	{
	__base(const __base&);
	__base& operator=(const __base&);
	public:
	_LIBCPP_INLINE_VISIBILITY __base() {}
	_LIBCPP_INLINE_VISIBILITY virtual ~__base() {}
	virtual __base* __clone() const = 0;
	virtual void __clone(__base*) const = 0;
	virtual void destroy() _NOEXCEPT = 0;
	virtual void destroy_deallocate() _NOEXCEPT = 0;
	virtual _Rp operator()(_ArgTypes&& ...) = 0;
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const _NOEXCEPT = 0;
	virtual const std::type_info& target_type() const _NOEXCEPT = 0;
	#endif // _LIBCPP_NO_RTTI
	};

	// __func implements __base for a given functor type.

	template<class _FD, class _Alloc, class _FB> class __func;

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	class __func<_Fp, _Alloc, _Rp(_ArgTypes...)>
	: public __base<_Rp(_ArgTypes...)>
	{
	__alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> __f_;
	public:
	_LIBCPP_INLINE_VISIBILITY
	explicit __func(_Fp&& __f)
	: __f_(_VSTD::move(__f)) {}

	_LIBCPP_INLINE_VISIBILITY
	explicit __func(const _Fp& __f, const _Alloc& __a)
	: __f_(__f, __a) {}

	_LIBCPP_INLINE_VISIBILITY
	explicit __func(const _Fp& __f, _Alloc&& __a)
	: __f_(__f, _VSTD::move(__a)) {}

	_LIBCPP_INLINE_VISIBILITY
	explicit __func(_Fp&& __f, _Alloc&& __a)
	: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}

	virtual __base<_Rp(_ArgTypes...)>* __clone() const;
	virtual void __clone(__base<_Rp(_ArgTypes...)>*) const;
	virtual void destroy() _NOEXCEPT;
	virtual void destroy_deallocate() _NOEXCEPT;
	virtual _Rp operator()(_ArgTypes&&... __arg);
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const _NOEXCEPT;
	virtual const std::type_info& target_type() const _NOEXCEPT;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	__base<_Rp(_ArgTypes...)>*
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.__get_allocator());
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __func(__f_.__target(), _Alloc(__a));
	return __hold.release();
	}

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	void
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::__clone(__base<_Rp(_ArgTypes...)>* __p) const
	{
	::new ((void*)__p) __func(__f_.__target(), __f_.__get_allocator());
	}

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	void
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy() _NOEXCEPT
	{
	__f_.destroy();
	}

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	void
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::destroy_deallocate() _NOEXCEPT
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.__get_allocator());
	__f_.destroy();
	__a.deallocate(this, 1);
	}

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	_Rp
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&& ... __arg)
	{
	return __f_(_VSTD::forward<_ArgTypes>(__arg)...);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	const void*
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT
	{
	if (__ti == typeid(_Fp))
	return _VSTD::addressof(__f_.__target());
	return nullptr;
	}

	template<class _Fp, class _Alloc, class _Rp, class ..._ArgTypes>
	const std::type_info&
	__func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() const _NOEXCEPT
	{
	return typeid(_Fp);
	}

	#endif // _LIBCPP_NO_RTTI

	// __value_func creates a value-type from a __func.

	template <class _Fp> class __value_func;

	template <class _Rp, class... _ArgTypes> class __value_func<_Rp(_ArgTypes...)>
	{
	typename aligned_storage<3 * sizeof(void*)>::type __buf_;

	typedef __base<_Rp(_ArgTypes...)> __func;
	__func* __f_;

	_LIBCPP_NO_CFI static __func* __as_base(void* __p)
	{
	return reinterpret_cast<__func*>(__p);
	}

	public:
	_LIBCPP_INLINE_VISIBILITY
	__value_func() _NOEXCEPT : __f_(nullptr) {}

	template <class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY __value_func(_Fp&& __f, const _Alloc& __a)
	: __f_(nullptr)
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef __function::__func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun;
	typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type
	_FunAlloc;

	if (__function::__not_null(__f))
	{
	_FunAlloc __af(__a);
	if (sizeof(_Fun) <= sizeof(__buf_) &&
	is_nothrow_copy_constructible<_Fp>::value &&
	is_nothrow_copy_constructible<_FunAlloc>::value)
	{
	__f_ =
	::new ((void*)&__buf_) _Fun(_VSTD::move(__f), _Alloc(__af));
	}
	else
	{
	typedef __allocator_destructor<_FunAlloc> _Dp;
	unique_ptr<__func, _Dp> __hold(__af.allocate(1), _Dp(__af, 1));
	::new ((void*)__hold.get()) _Fun(_VSTD::move(__f), _Alloc(__a));
	__f_ = __hold.release();
	}
	}
	}

	template <class _Fp,
	class = typename enable_if<!is_same<typename decay<_Fp>::type, __value_func>::value>::type>
	_LIBCPP_INLINE_VISIBILITY explicit __value_func(_Fp&& __f)
	: __value_func(_VSTD::forward<_Fp>(__f), allocator<_Fp>()) {}

	_LIBCPP_INLINE_VISIBILITY
	__value_func(const __value_func& __f)
	{
	if (__f.__f_ == nullptr)
	__f_ = nullptr;
	else if ((void*)__f.__f_ == &__f.__buf_)
	{
	__f_ = __as_base(&__buf_);
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	_LIBCPP_INLINE_VISIBILITY
	__value_func(__value_func&& __f) _NOEXCEPT
	{
	if (__f.__f_ == nullptr)
	__f_ = nullptr;
	else if ((void*)__f.__f_ == &__f.__buf_)
	{
	__f_ = __as_base(&__buf_);
	__f.__f_->__clone(__f_);
	}
	else
	{
	__f_ = __f.__f_;
	__f.__f_ = nullptr;
	}
	}

	_LIBCPP_INLINE_VISIBILITY
	~__value_func()
	{
	if ((void*)__f_ == &__buf_)
	__f_->destroy();
	else if (__f_)
	__f_->destroy_deallocate();
	}

	_LIBCPP_INLINE_VISIBILITY
	__value_func& operator=(__value_func&& __f)
	{
	*this = nullptr;
	if (__f.__f_ == nullptr)
	__f_ = nullptr;
	else if ((void*)__f.__f_ == &__f.__buf_)
	{
	__f_ = __as_base(&__buf_);
	__f.__f_->__clone(__f_);
	}
	else
	{
	__f_ = __f.__f_;
	__f.__f_ = nullptr;
	}
	return *this;
	}

	_LIBCPP_INLINE_VISIBILITY
	__value_func& operator=(nullptr_t)
	{
	__func* __f = __f_;
	__f_ = nullptr;
	if ((void*)__f == &__buf_)
	__f->destroy();
	else if (__f)
	__f->destroy_deallocate();
	return *this;
	}

	_LIBCPP_INLINE_VISIBILITY
	_Rp operator()(_ArgTypes&&... __args) const
	{
	if (__f_ == nullptr)
	__throw_bad_function_call();
	return (*__f_)(_VSTD::forward<_ArgTypes>(__args)...);
	}

	_LIBCPP_INLINE_VISIBILITY
	void swap(__value_func& __f) _NOEXCEPT
	{
	if (&__f == this)
	return;
	if ((void)__f_ == &__buf_ && (void)__f.__f_ == &__f.__buf_)
	{
	typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
	__func* __t = __as_base(&__tempbuf);
	__f_->__clone(__t);
	__f_->destroy();
	__f_ = nullptr;
	__f.__f_->__clone(__as_base(&__buf_));
	__f.__f_->destroy();
	__f.__f_ = nullptr;
	__f_ = __as_base(&__buf_);
	__t->__clone(__as_base(&__f.__buf_));
	__t->destroy();
	__f.__f_ = __as_base(&__f.__buf_);
	}
	else if ((void*)__f_ == &__buf_)
	{
	__f_->__clone(__as_base(&__f.__buf_));
	__f_->destroy();
	__f_ = __f.__f_;
	__f.__f_ = __as_base(&__f.__buf_);
	}
	else if ((void*)__f.__f_ == &__f.__buf_)
	{
	__f.__f_->__clone(__as_base(&__buf_));
	__f.__f_->destroy();
	__f.__f_ = __f_;
	__f_ = __as_base(&__buf_);
	}
	else
	_VSTD::swap(__f_, __f.__f_);
	}

	_LIBCPP_INLINE_VISIBILITY
	explicit operator bool() const _NOEXCEPT { return __f_ != nullptr; }

	#ifndef _LIBCPP_NO_RTTI
	_LIBCPP_INLINE_VISIBILITY
	const std::type_info& target_type() const _NOEXCEPT
	{
	if (__f_ == nullptr)
	return typeid(void);
	return __f_->target_type();
	}

	template <typename _Tp>
	_LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT
	{
	if (__f_ == nullptr)
	return nullptr;
	return (const _Tp*)__f_->target(typeid(_Tp));
	}
	#endif // _LIBCPP_NO_RTTI
	};

	// Storage for a functor object, to be used with __policy to manage copy and
	// destruction.
	union __policy_storage
	{
	mutable char __small[sizeof(void) 2];
	void* __large;
	};

	// True if _Fun can safely be held in __policy_storage.__small.
	template <typename _Fun>
	struct __use_small_storage
	: public integral_constant<
	bool, sizeof(_Fun) <= sizeof(__policy_storage) &&
	_LIBCPP_ALIGNOF(_Fun) <= _LIBCPP_ALIGNOF(__policy_storage) &&
	is_trivially_copy_constructible<_Fun>::value &&
	is_trivially_destructible<_Fun>::value> {};

	// Policy contains information about how to copy, destroy, and move the
	// underlying functor. You can think of it as a vtable of sorts.
	struct __policy
	{
	// Used to copy or destroy __large values. null for trivial objects.
	void* (const __clone)(const void);
	void (const __destroy)(void);

	// True if this is the null policy (no value).
	const bool __is_null;

	// The target type. May be null if RTTI is disabled.
	const std::type_info* const __type_info;

	// Returns a pointer to a static policy object suitable for the functor
	// type.
	template <typename _Fun>
	_LIBCPP_INLINE_VISIBILITY static const __policy* __create()
	{
	return __choose_policy<_Fun>(__use_small_storage<_Fun>());
	}

	_LIBCPP_INLINE_VISIBILITY
	static const __policy* __create_empty()
	{
	static const _LIBCPP_CONSTEXPR __policy __policy_ = {nullptr, nullptr,
	true,
	#ifndef _LIBCPP_NO_RTTI
	&typeid(void)
	#else
	nullptr
	#endif
	};
	return &__policy_;
	}

	private:
	template <typename _Fun> static void* __large_clone(const void* __s)
	{
	const _Fun* __f = static_cast<const _Fun*>(__s);
	return __f->__clone();
	}

	template <typename _Fun>
	static void __large_destroy(void* __s) {
	_Fun::__destroy_and_delete(static_cast<_Fun*>(__s));
	}

	template <typename _Fun>
	_LIBCPP_INLINE_VISIBILITY static const __policy*
	__choose_policy(/* is_small = */ false_type) {
	static const _LIBCPP_CONSTEXPR __policy __policy_ = {
	&__large_clone<_Fun>, &__large_destroy<_Fun>, false,
	#ifndef _LIBCPP_NO_RTTI
	&typeid(typename _Fun::_Target)
	#else
	nullptr
	#endif
	};
	return &__policy_;
	}

	template <typename _Fun>
	_LIBCPP_INLINE_VISIBILITY static const __policy*
	__choose_policy(/* is_small = */ true_type)
	{
	static const _LIBCPP_CONSTEXPR __policy __policy_ = {
	nullptr, nullptr, false,
	#ifndef _LIBCPP_NO_RTTI
	&typeid(typename _Fun::_Target)
	#else
	nullptr
	#endif
	};
	return &__policy_;
	}
	};

	// Used to choose between perfect forwarding or pass-by-value. Pass-by-value is
	// faster for types that can be passed in registers.
	template <typename _Tp>
	using __fast_forward =
	typename conditional<is_scalar<_Tp>::value, _Tp, _Tp&&>::type;

	// __policy_invoker calls an instance of __alloc_func held in __policy_storage.

	template <class _Fp> struct __policy_invoker;

	template <class _Rp, class... _ArgTypes>
	struct __policy_invoker<_Rp(_ArgTypes...)>
	{
	typedef _Rp (__Call)(const __policy_storage,
	__fast_forward<_ArgTypes>...);

	__Call __call_;

	// Creates an invoker that throws bad_function_call.
	_LIBCPP_INLINE_VISIBILITY
	__policy_invoker() : __call_(&__call_empty) {}

	// Creates an invoker that calls the given instance of __func.
	template <typename _Fun>
	_LIBCPP_INLINE_VISIBILITY static __policy_invoker __create()
	{
	return __policy_invoker(&__call_impl<_Fun>);
	}

	private:
	_LIBCPP_INLINE_VISIBILITY
	explicit __policy_invoker(__Call __c) : __call_(__c) {}

	static _Rp __call_empty(const __policy_storage*,
	__fast_forward<_ArgTypes>...)
	{
	__throw_bad_function_call();
	}

	template <typename _Fun>
	static _Rp __call_impl(const __policy_storage* __buf,
	__fast_forward<_ArgTypes>... __args)
	{
	_Fun* __f = reinterpret_cast<_Fun*>(__use_small_storage<_Fun>::value
	? &__buf->__small
	: __buf->__large);
	return (*__f)(_VSTD::forward<_ArgTypes>(__args)...);
	}
	};

	// __policy_func uses a __policy and __policy_invoker to create a type-erased,
	// copyable functor.

	template <class _Fp> class __policy_func;

	template <class _Rp, class... _ArgTypes> class __policy_func<_Rp(_ArgTypes...)>
	{
	// Inline storage for small objects.
	__policy_storage __buf_;

	// Calls the value stored in __buf_. This could technically be part of
	// policy, but storing it here eliminates a level of indirection inside
	// operator().
	typedef __function::__policy_invoker<_Rp(_ArgTypes...)> __invoker;
	__invoker __invoker_;

	// The policy that describes how to move / copy / destroy __buf_. Never
	// null, even if the function is empty.
	const __policy* __policy_;

	public:
	_LIBCPP_INLINE_VISIBILITY
	__policy_func() : __policy_(__policy::__create_empty()) {}

	template <class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY __policy_func(_Fp&& __f, const _Alloc& __a)
	: __policy_(__policy::__create_empty())
	{
	typedef __alloc_func<_Fp, _Alloc, _Rp(_ArgTypes...)> _Fun;
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, _Fun>::type
	_FunAlloc;

	if (__function::__not_null(__f))
	{
	__invoker_ = __invoker::template __create<_Fun>();
	__policy_ = __policy::__create<_Fun>();

	_FunAlloc __af(__a);
	if (__use_small_storage<_Fun>())
	{
	::new ((void*)&__buf_.__small)
	_Fun(_VSTD::move(__f), _Alloc(__af));
	}
	else
	{
	typedef __allocator_destructor<_FunAlloc> _Dp;
	unique_ptr<_Fun, _Dp> __hold(__af.allocate(1), _Dp(__af, 1));
	::new ((void*)__hold.get())
	_Fun(_VSTD::move(__f), _Alloc(__af));
	__buf_.__large = __hold.release();
	}
	}
	}

	template <class _Fp, class = typename enable_if<!is_same<typename decay<_Fp>::type, __policy_func>::value>::type>
	_LIBCPP_INLINE_VISIBILITY explicit __policy_func(_Fp&& __f)
	: __policy_(__policy::__create_empty()) {
	typedef __default_alloc_func<_Fp, _Rp(_ArgTypes...)> _Fun;

	if (__function::__not_null(__f)) {
	__invoker_ = __invoker::template __create<_Fun>();
	__policy_ = __policy::__create<_Fun>();
	if (__use_small_storage<_Fun>()) {
	::new ((void*)&__buf_.__small) _Fun(_VSTD::move(__f));
	} else {
	__builtin_new_allocator::__holder_t __hold =
	__builtin_new_allocator::__allocate_type<_Fun>(1);
	__buf_.__large = ::new ((void*)__hold.get()) _Fun(_VSTD::move(__f));
	(void)__hold.release();
	}
	}
	}

	_LIBCPP_INLINE_VISIBILITY
	__policy_func(const __policy_func& __f)
	: __buf_(__f.__buf_), __invoker_(__f.__invoker_),
	__policy_(__f.__policy_)
	{
	if (__policy_->__clone)
	__buf_.__large = __policy_->__clone(__f.__buf_.__large);
	}

	_LIBCPP_INLINE_VISIBILITY
	__policy_func(__policy_func&& __f)
	: __buf_(__f.__buf_), __invoker_(__f.__invoker_),
	__policy_(__f.__policy_)
	{
	if (__policy_->__destroy)
	{
	__f.__policy_ = __policy::__create_empty();
	__f.__invoker_ = __invoker();
	}
	}

	_LIBCPP_INLINE_VISIBILITY
	~__policy_func()
	{
	if (__policy_->__destroy)
	__policy_->__destroy(__buf_.__large);
	}

	_LIBCPP_INLINE_VISIBILITY
	__policy_func& operator=(__policy_func&& __f)
	{
	*this = nullptr;
	__buf_ = __f.__buf_;
	__invoker_ = __f.__invoker_;
	__policy_ = __f.__policy_;
	__f.__policy_ = __policy::__create_empty();
	__f.__invoker_ = __invoker();
	return *this;
	}

	_LIBCPP_INLINE_VISIBILITY
	__policy_func& operator=(nullptr_t)
	{
	const __policy* __p = __policy_;
	__policy_ = __policy::__create_empty();
	__invoker_ = __invoker();
	if (__p->__destroy)
	__p->__destroy(__buf_.__large);
	return *this;
	}

	_LIBCPP_INLINE_VISIBILITY
	_Rp operator()(_ArgTypes&&... __args) const
	{
	return __invoker_.__call_(_VSTD::addressof(__buf_),
	_VSTD::forward<_ArgTypes>(__args)...);
	}

	_LIBCPP_INLINE_VISIBILITY
	void swap(__policy_func& __f)
	{
	_VSTD::swap(__invoker_, __f.__invoker_);
	_VSTD::swap(__policy_, __f.__policy_);
	_VSTD::swap(__buf_, __f.__buf_);
	}

	_LIBCPP_INLINE_VISIBILITY
	explicit operator bool() const _NOEXCEPT
	{
	return !__policy_->__is_null;
	}

	#ifndef _LIBCPP_NO_RTTI
	_LIBCPP_INLINE_VISIBILITY
	const std::type_info& target_type() const _NOEXCEPT
	{
	return *__policy_->__type_info;
	}

	template <typename _Tp>
	_LIBCPP_INLINE_VISIBILITY const _Tp* target() const _NOEXCEPT
	{
	if (__policy_->__is_null \|\| typeid(_Tp) != *__policy_->__type_info)
	return nullptr;
	if (__policy_->__clone) // Out of line storage.
	return reinterpret_cast<const _Tp*>(__buf_.__large);
	else
	return reinterpret_cast<const _Tp*>(&__buf_.__small);
	}
	#endif // _LIBCPP_NO_RTTI
	};

	-#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) && !defined(_LIBCPP_HAS_OBJC_ARC)
	+#if defined(_LIBCPP_HAS_BLOCKS_RUNTIME)

	extern "C" void _Block_copy(const void );
	extern "C" void _Block_release(const void *);

	template<class _Rp1, class ..._ArgTypes1, class _Alloc, class _Rp, class ..._ArgTypes>
	class __func<_Rp1(^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)>
	: public __base<_Rp(_ArgTypes...)>
	{
	typedef _Rp1(^__block_type)(_ArgTypes1...);
	__block_type __f_;

	public:
	_LIBCPP_INLINE_VISIBILITY
	explicit __func(__block_type const& __f)
	+#ifdef _LIBCPP_HAS_OBJC_ARC
	+ : __f_(__f)
	+#else
	: __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr))
	+#endif
	{ }

	// [TODO] add && to save on a retain

	_LIBCPP_INLINE_VISIBILITY
	explicit __func(__block_type __f, const _Alloc& /* unused */)
	+#ifdef _LIBCPP_HAS_OBJC_ARC
	+ : __f_(__f)
	+#else
	: __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr))
	+#endif
	{ }

	virtual __base<_Rp(_ArgTypes...)>* __clone() const {
	_LIBCPP_ASSERT(false,
	"Block pointers are just pointers, so they should always fit into "
	"std::function's small buffer optimization. This function should "
	"never be invoked.");
	return nullptr;
	}

	virtual void __clone(__base<_Rp(_ArgTypes...)>* __p) const {
	::new ((void*)__p) __func(__f_);
	}

	virtual void destroy() _NOEXCEPT {
	+#ifndef _LIBCPP_HAS_OBJC_ARC
	if (__f_)
	_Block_release(__f_);
	+#endif
	__f_ = 0;
	}

	virtual void destroy_deallocate() _NOEXCEPT {
	_LIBCPP_ASSERT(false,
	"Block pointers are just pointers, so they should always fit into "
	"std::function's small buffer optimization. This function should "
	"never be invoked.");
	}

	virtual _Rp operator()(_ArgTypes&& ... __arg) {
	return _VSTD::__invoke(__f_, _VSTD::forward<_ArgTypes>(__arg)...);
	}

	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(type_info const& __ti) const _NOEXCEPT {
	if (__ti == typeid(__func::__block_type))
	return &__f_;
	return (const void*)nullptr;
	}

	virtual const std::type_info& target_type() const _NOEXCEPT {
	return typeid(__func::__block_type);
	}
	#endif // _LIBCPP_NO_RTTI
	};

	-#endif // _LIBCPP_HAS_EXTENSION_BLOCKS && !_LIBCPP_HAS_OBJC_ARC
	+#endif // _LIBCPP_HAS_EXTENSION_BLOCKS

	} // namespace __function

	template<class _Rp, class ..._ArgTypes>
	class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)>
	: public __function::__maybe_derive_from_unary_function<_Rp(_ArgTypes...)>,
	public __function::__maybe_derive_from_binary_function<_Rp(_ArgTypes...)>
	{
	#ifndef _LIBCPP_ABI_OPTIMIZED_FUNCTION
	typedef __function::__value_func<_Rp(_ArgTypes...)> __func;
	#else
	typedef __function::__policy_func<_Rp(_ArgTypes...)> __func;
	#endif

	__func __f_;

	template <class _Fp, bool = _And<
	_IsNotSame<__uncvref_t<_Fp>, function>,
	__invokable<_Fp, _ArgTypes...>
	>::value>
	struct __callable;
	template <class _Fp>
	struct __callable<_Fp, true>
	{
	static const bool value = is_void<_Rp>::value \|\|
	__is_core_convertible<typename __invoke_of<_Fp, _ArgTypes...>::type,
	_Rp>::value;
	};
	template <class _Fp>
	struct __callable<_Fp, false>
	{
	static const bool value = false;
	};

	template <class _Fp>
	using _EnableIfLValueCallable = typename enable_if<__callable<_Fp&>::value>::type;
	public:
	typedef _Rp result_type;

	// construct/copy/destroy:
	_LIBCPP_INLINE_VISIBILITY
	function() _NOEXCEPT { }
	_LIBCPP_INLINE_VISIBILITY
	function(nullptr_t) _NOEXCEPT {}
	function(const function&);
	function(function&&) _NOEXCEPT;
	template<class _Fp, class = _EnableIfLValueCallable<_Fp>>
	function(_Fp);

	#if _LIBCPP_STD_VER <= 14
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&) _NOEXCEPT {}
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&, nullptr_t) _NOEXCEPT {}
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, const function&);
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, function&&);
	template<class _Fp, class _Alloc, class = _EnableIfLValueCallable<_Fp>>
	function(allocator_arg_t, const _Alloc& __a, _Fp __f);
	#endif

	function& operator=(const function&);
	function& operator=(function&&) _NOEXCEPT;
	function& operator=(nullptr_t) _NOEXCEPT;
	template<class _Fp, class = _EnableIfLValueCallable<typename decay<_Fp>::type>>
	function& operator=(_Fp&&);

	~function();

	// function modifiers:
	void swap(function&) _NOEXCEPT;

	#if _LIBCPP_STD_VER <= 14
	template<class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	void assign(_Fp&& __f, const _Alloc& __a)
	{function(allocator_arg, __a, _VSTD::forward<_Fp>(__f)).swap(*this);}
	#endif

	// function capacity:
	_LIBCPP_INLINE_VISIBILITY
	explicit operator bool() const _NOEXCEPT {
	return static_cast<bool>(__f_);
	}

	// deleted overloads close possible hole in the type system
	template<class _R2, class... _ArgTypes2>
	bool operator==(const function<_R2(_ArgTypes2...)>&) const = delete;
	template<class _R2, class... _ArgTypes2>
	bool operator!=(const function<_R2(_ArgTypes2...)>&) const = delete;
	public:
	// function invocation:
	_Rp operator()(_ArgTypes...) const;

	#ifndef _LIBCPP_NO_RTTI
	// function target access:
	const std::type_info& target_type() const _NOEXCEPT;
	template <typename _Tp> _Tp* target() _NOEXCEPT;
	template <typename _Tp> const _Tp* target() const _NOEXCEPT;
	#endif // _LIBCPP_NO_RTTI
	};

	#if _LIBCPP_STD_VER >= 17
	template<class _Rp, class ..._Ap>
	function(_Rp(*)(_Ap...)) -> function<_Rp(_Ap...)>;

	template<class _Fp>
	struct __strip_signature;

	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...)> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile> { using type = _Rp(_Ap...); };

	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) &> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const &> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile &> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile &> { using type = _Rp(_Ap...); };

	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile noexcept> { using type = _Rp(_Ap...); };

	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) & noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const & noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) volatile & noexcept> { using type = _Rp(_Ap...); };
	template<class _Rp, class _Gp, class ..._Ap>
	struct __strip_signature<_Rp (_Gp::*) (_Ap...) const volatile & noexcept> { using type = _Rp(_Ap...); };

	template<class _Fp, class _Stripped = typename __strip_signature<decltype(&_Fp::operator())>::type>
	function(_Fp) -> function<_Stripped>;
	#endif // _LIBCPP_STD_VER >= 17

	template<class _Rp, class ..._ArgTypes>
	function<_Rp(_ArgTypes...)>::function(const function& __f) : __f_(__f.__f_) {}

	#if _LIBCPP_STD_VER <= 14
	template<class _Rp, class ..._ArgTypes>
	template <class _Alloc>
	function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&,
	const function& __f) : __f_(__f.__f_) {}
	#endif

	template <class _Rp, class... _ArgTypes>
	function<_Rp(_ArgTypes...)>::function(function&& __f) _NOEXCEPT
	: __f_(_VSTD::move(__f.__f_)) {}

	#if _LIBCPP_STD_VER <= 14
	template<class _Rp, class ..._ArgTypes>
	template <class _Alloc>
	function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc&,
	function&& __f)
	: __f_(_VSTD::move(__f.__f_)) {}
	#endif

	template <class _Rp, class... _ArgTypes>
	template <class _Fp, class>
	function<_Rp(_ArgTypes...)>::function(_Fp __f) : __f_(_VSTD::move(__f)) {}

	#if _LIBCPP_STD_VER <= 14
	template <class _Rp, class... _ArgTypes>
	template <class _Fp, class _Alloc, class>
	function<_Rp(_ArgTypes...)>::function(allocator_arg_t, const _Alloc& __a,
	_Fp __f)
	: __f_(_VSTD::move(__f), __a) {}
	#endif

	template<class _Rp, class ..._ArgTypes>
	function<_Rp(_ArgTypes...)>&
	function<_Rp(_ArgTypes...)>::operator=(const function& __f)
	{
	function(__f).swap(*this);
	return *this;
	}

	template<class _Rp, class ..._ArgTypes>
	function<_Rp(_ArgTypes...)>&
	function<_Rp(_ArgTypes...)>::operator=(function&& __f) _NOEXCEPT
	{
	__f_ = _VSTD::move(__f.__f_);
	return *this;
	}

	template<class _Rp, class ..._ArgTypes>
	function<_Rp(_ArgTypes...)>&
	function<_Rp(_ArgTypes...)>::operator=(nullptr_t) _NOEXCEPT
	{
	__f_ = nullptr;
	return *this;
	}

	template<class _Rp, class ..._ArgTypes>
	template <class _Fp, class>
	function<_Rp(_ArgTypes...)>&
	function<_Rp(_ArgTypes...)>::operator=(_Fp&& __f)
	{
	function(_VSTD::forward<_Fp>(__f)).swap(*this);
	return *this;
	}

	template<class _Rp, class ..._ArgTypes>
	function<_Rp(_ArgTypes...)>::~function() {}

	template<class _Rp, class ..._ArgTypes>
	void
	function<_Rp(_ArgTypes...)>::swap(function& __f) _NOEXCEPT
	{
	__f_.swap(__f.__f_);
	}

	template<class _Rp, class ..._ArgTypes>
	_Rp
	function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) const
	{
	return __f_(_VSTD::forward<_ArgTypes>(__arg)...);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Rp, class ..._ArgTypes>
	const std::type_info&
	function<_Rp(_ArgTypes...)>::target_type() const _NOEXCEPT
	{
	return __f_.target_type();
	}

	template<class _Rp, class ..._ArgTypes>
	template <typename _Tp>
	_Tp*
	function<_Rp(_ArgTypes...)>::target() _NOEXCEPT
	{
	return (_Tp*)(__f_.template target<_Tp>());
	}

	template<class _Rp, class ..._ArgTypes>
	template <typename _Tp>
	const _Tp*
	function<_Rp(_ArgTypes...)>::target() const _NOEXCEPT
	{
	return __f_.template target<_Tp>();
	}

	#endif // _LIBCPP_NO_RTTI

	template <class _Rp, class... _ArgTypes>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return !__f;}

	template <class _Rp, class... _ArgTypes>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return !__f;}

	template <class _Rp, class... _ArgTypes>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT {return (bool)__f;}

	template <class _Rp, class... _ArgTypes>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(nullptr_t, const function<_Rp(_ArgTypes...)>& __f) _NOEXCEPT {return (bool)__f;}

	template <class _Rp, class... _ArgTypes>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	swap(function<_Rp(_ArgTypes...)>& __x, function<_Rp(_ArgTypes...)>& __y) _NOEXCEPT
	{return __x.swap(__y);}

	#elif defined(_LIBCPP_ENABLE_CXX03_FUNCTION)

	namespace __function {

	template<class _Fp> class __base;

	template<class _Rp>
	class __base<_Rp()>
	{
	__base(const __base&);
	__base& operator=(const __base&);
	public:
	__base() {}
	virtual ~__base() {}
	virtual __base* __clone() const = 0;
	virtual void __clone(__base*) const = 0;
	virtual void destroy() = 0;
	virtual void destroy_deallocate() = 0;
	virtual _Rp operator()() = 0;
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const = 0;
	virtual const std::type_info& target_type() const = 0;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0>
	class __base<_Rp(_A0)>
	{
	__base(const __base&);
	__base& operator=(const __base&);
	public:
	__base() {}
	virtual ~__base() {}
	virtual __base* __clone() const = 0;
	virtual void __clone(__base*) const = 0;
	virtual void destroy() = 0;
	virtual void destroy_deallocate() = 0;
	virtual _Rp operator()(_A0) = 0;
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const = 0;
	virtual const std::type_info& target_type() const = 0;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0, class _A1>
	class __base<_Rp(_A0, _A1)>
	{
	__base(const __base&);
	__base& operator=(const __base&);
	public:
	__base() {}
	virtual ~__base() {}
	virtual __base* __clone() const = 0;
	virtual void __clone(__base*) const = 0;
	virtual void destroy() = 0;
	virtual void destroy_deallocate() = 0;
	virtual _Rp operator()(_A0, _A1) = 0;
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const = 0;
	virtual const std::type_info& target_type() const = 0;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0, class _A1, class _A2>
	class __base<_Rp(_A0, _A1, _A2)>
	{
	__base(const __base&);
	__base& operator=(const __base&);
	public:
	__base() {}
	virtual ~__base() {}
	virtual __base* __clone() const = 0;
	virtual void __clone(__base*) const = 0;
	virtual void destroy() = 0;
	virtual void destroy_deallocate() = 0;
	virtual _Rp operator()(_A0, _A1, _A2) = 0;
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const = 0;
	virtual const std::type_info& target_type() const = 0;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _FD, class _Alloc, class _FB> class __func;

	template<class _Fp, class _Alloc, class _Rp>
	class __func<_Fp, _Alloc, _Rp()>
	: public __base<_Rp()>
	{
	__compressed_pair<_Fp, _Alloc> __f_;
	public:
	explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
	explicit __func(_Fp __f, _Alloc __a) : __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
	virtual __base<_Rp()>* __clone() const;
	virtual void __clone(__base<_Rp()>*) const;
	virtual void destroy();
	virtual void destroy_deallocate();
	virtual _Rp operator()();
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const;
	virtual const std::type_info& target_type() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Fp, class _Alloc, class _Rp>
	__base<_Rp()>*
	__func<_Fp, _Alloc, _Rp()>::__clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
	return __hold.release();
	}

	template<class _Fp, class _Alloc, class _Rp>
	void
	__func<_Fp, _Alloc, _Rp()>::__clone(__base<_Rp()>* __p) const
	{
	::new ((void*)__p) __func(__f_.first(), __f_.second());
	}

	template<class _Fp, class _Alloc, class _Rp>
	void
	__func<_Fp, _Alloc, _Rp()>::destroy()
	{
	__f_.~__compressed_pair<_Fp, _Alloc>();
	}

	template<class _Fp, class _Alloc, class _Rp>
	void
	__func<_Fp, _Alloc, _Rp()>::destroy_deallocate()
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	__f_.~__compressed_pair<_Fp, _Alloc>();
	__a.deallocate(this, 1);
	}

	template<class _Fp, class _Alloc, class _Rp>
	_Rp
	__func<_Fp, _Alloc, _Rp()>::operator()()
	{
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_.first());
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp>
	const void*
	__func<_Fp, _Alloc, _Rp()>::target(const type_info& __ti) const
	{
	if (__ti == typeid(_Fp))
	return _VSTD::addressof(__f_.first());
	return (const void*)0;
	}

	template<class _Fp, class _Alloc, class _Rp>
	const std::type_info&
	__func<_Fp, _Alloc, _Rp()>::target_type() const
	{
	return typeid(_Fp);
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	class __func<_Fp, _Alloc, _Rp(_A0)>
	: public __base<_Rp(_A0)>
	{
	__compressed_pair<_Fp, _Alloc> __f_;
	public:
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
	: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
	virtual __base<_Rp(_A0)>* __clone() const;
	virtual void __clone(__base<_Rp(_A0)>*) const;
	virtual void destroy();
	virtual void destroy_deallocate();
	virtual _Rp operator()(_A0);
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const;
	virtual const std::type_info& target_type() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	__base<_Rp(_A0)>*
	__func<_Fp, _Alloc, _Rp(_A0)>::__clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
	return __hold.release();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	void
	__func<_Fp, _Alloc, _Rp(_A0)>::__clone(__base<_Rp(_A0)>* __p) const
	{
	::new ((void*)__p) __func(__f_.first(), __f_.second());
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	void
	__func<_Fp, _Alloc, _Rp(_A0)>::destroy()
	{
	__f_.~__compressed_pair<_Fp, _Alloc>();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	void
	__func<_Fp, _Alloc, _Rp(_A0)>::destroy_deallocate()
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	__f_.~__compressed_pair<_Fp, _Alloc>();
	__a.deallocate(this, 1);
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	_Rp
	__func<_Fp, _Alloc, _Rp(_A0)>::operator()(_A0 __a0)
	{
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_.first(), __a0);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	const void*
	__func<_Fp, _Alloc, _Rp(_A0)>::target(const type_info& __ti) const
	{
	if (__ti == typeid(_Fp))
	return &__f_.first();
	return (const void*)0;
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0>
	const std::type_info&
	__func<_Fp, _Alloc, _Rp(_A0)>::target_type() const
	{
	return typeid(_Fp);
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	class __func<_Fp, _Alloc, _Rp(_A0, _A1)>
	: public __base<_Rp(_A0, _A1)>
	{
	__compressed_pair<_Fp, _Alloc> __f_;
	public:
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
	: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
	virtual __base<_Rp(_A0, _A1)>* __clone() const;
	virtual void __clone(__base<_Rp(_A0, _A1)>*) const;
	virtual void destroy();
	virtual void destroy_deallocate();
	virtual _Rp operator()(_A0, _A1);
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const;
	virtual const std::type_info& target_type() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	__base<_Rp(_A0, _A1)>*
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
	return __hold.release();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone(__base<_Rp(_A0, _A1)>* __p) const
	{
	::new ((void*)__p) __func(__f_.first(), __f_.second());
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::destroy()
	{
	__f_.~__compressed_pair<_Fp, _Alloc>();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::destroy_deallocate()
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	__f_.~__compressed_pair<_Fp, _Alloc>();
	__a.deallocate(this, 1);
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	_Rp
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::operator()(_A0 __a0, _A1 __a1)
	{
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_.first(), __a0, __a1);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	const void*
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::target(const type_info& __ti) const
	{
	if (__ti == typeid(_Fp))
	return &__f_.first();
	return (const void*)0;
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1>
	const std::type_info&
	__func<_Fp, _Alloc, _Rp(_A0, _A1)>::target_type() const
	{
	return typeid(_Fp);
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	class __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>
	: public __base<_Rp(_A0, _A1, _A2)>
	{
	__compressed_pair<_Fp, _Alloc> __f_;
	public:
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f) : __f_(_VSTD::move(__f), __default_init_tag()) {}
	_LIBCPP_INLINE_VISIBILITY explicit __func(_Fp __f, _Alloc __a)
	: __f_(_VSTD::move(__f), _VSTD::move(__a)) {}
	virtual __base<_Rp(_A0, _A1, _A2)>* __clone() const;
	virtual void __clone(__base<_Rp(_A0, _A1, _A2)>*) const;
	virtual void destroy();
	virtual void destroy_deallocate();
	virtual _Rp operator()(_A0, _A1, _A2);
	#ifndef _LIBCPP_NO_RTTI
	virtual const void* target(const type_info&) const;
	virtual const std::type_info& target_type() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	__base<_Rp(_A0, _A1, _A2)>*
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone() const
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a));
	return __hold.release();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone(__base<_Rp(_A0, _A1, _A2)>* __p) const
	{
	::new ((void*)__p) __func(__f_.first(), __f_.second());
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::destroy()
	{
	__f_.~__compressed_pair<_Fp, _Alloc>();
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	void
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::destroy_deallocate()
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	typedef typename __rebind_alloc_helper<__alloc_traits, __func>::type _Ap;
	_Ap __a(__f_.second());
	__f_.~__compressed_pair<_Fp, _Alloc>();
	__a.deallocate(this, 1);
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	_Rp
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::operator()(_A0 __a0, _A1 __a1, _A2 __a2)
	{
	typedef __invoke_void_return_wrapper<_Rp> _Invoker;
	return _Invoker::__call(__f_.first(), __a0, __a1, __a2);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	const void*
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::target(const type_info& __ti) const
	{
	if (__ti == typeid(_Fp))
	return &__f_.first();
	return (const void*)0;
	}

	template<class _Fp, class _Alloc, class _Rp, class _A0, class _A1, class _A2>
	const std::type_info&
	__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::target_type() const
	{
	return typeid(_Fp);
	}

	#endif // _LIBCPP_NO_RTTI

	} // namespace __function

	template<class _Rp>
	class _LIBCPP_TEMPLATE_VIS function<_Rp()>
	{
	typedef __function::__base<_Rp()> __base;
	aligned_storage<3sizeof(void)>::type __buf_;
	__base* __f_;

	public:
	typedef _Rp result_type;

	// 20.7.16.2.1, construct/copy/destroy:
	_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
	_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
	function(const function&);
	template<class _Fp>
	function(_Fp,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&) : __f_(0) {}
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, const function&);
	template<class _Fp, class _Alloc>
	function(allocator_arg_t, const _Alloc& __a, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	function& operator=(const function&);
	function& operator=(nullptr_t);
	template<class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function&
	>::type
	operator=(_Fp);

	~function();

	// 20.7.16.2.2, function modifiers:
	void swap(function&);
	template<class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	void assign(_Fp __f, const _Alloc& __a)
	{function(allocator_arg, __a, __f).swap(*this);}

	// 20.7.16.2.3, function capacity:
	_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}

	template<class _R2>
	bool operator==(const function<_R2()>&) const = delete;
	template<class _R2>
	bool operator!=(const function<_R2()>&) const = delete;

	// 20.7.16.2.4, function invocation:
	_Rp operator()() const;

	#ifndef _LIBCPP_NO_RTTI
	// 20.7.16.2.5, function target access:
	const std::type_info& target_type() const;
	template <typename _Tp> _Tp* target();
	template <typename _Tp> const _Tp* target() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp>
	function<_Rp()>::function(const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp>
	template<class _Alloc>
	function<_Rp()>::function(allocator_arg_t, const _Alloc&, const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp>
	template <class _Fp>
	function<_Rp()>::function(_Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, allocator<_Fp>, _Rp()> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f);
	}
	else
	{
	typedef allocator<_FF> _Ap;
	_Ap __a;
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp>
	template <class _Fp, class _Alloc>
	function<_Rp()>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, _Alloc, _Rp()> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f, __a0);
	}
	else
	{
	typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
	_Ap __a(__a0);
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp>
	function<_Rp()>&
	function<_Rp()>::operator=(const function& __f)
	{
	if (__f)
	function(__f).swap(*this);
	else
	*this = nullptr;
	return *this;
	}

	template<class _Rp>
	function<_Rp()>&
	function<_Rp()>::operator=(nullptr_t)
	{
	__base* __t = __f_;
	__f_ = 0;
	if (__t == (__base*)&__buf_)
	__t->destroy();
	else if (__t)
	__t->destroy_deallocate();
	return *this;
	}

	template<class _Rp>
	template <class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function<_Rp()>&
	>::type
	function<_Rp()>::operator=(_Fp __f)
	{
	function(_VSTD::move(__f)).swap(*this);
	return *this;
	}

	template<class _Rp>
	function<_Rp()>::~function()
	{
	if (__f_ == (__base*)&__buf_)
	__f_->destroy();
	else if (__f_)
	__f_->destroy_deallocate();
	}

	template<class _Rp>
	void
	function<_Rp()>::swap(function& __f)
	{
	if (_VSTD::addressof(__f) == this)
	return;
	if (__f_ == (__base)&__buf_ && __f.__f_ == (__base)&__f.__buf_)
	{
	typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
	__base* __t = (__base*)&__tempbuf;
	__f_->__clone(__t);
	__f_->destroy();
	__f_ = 0;
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = 0;
	__f_ = (__base*)&__buf_;
	__t->__clone((__base*)&__f.__buf_);
	__t->destroy();
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f_ == (__base*)&__buf_)
	{
	__f_->__clone((__base*)&__f.__buf_);
	__f_->destroy();
	__f_ = __f.__f_;
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f.__f_ == (__base*)&__f.__buf_)
	{
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = __f_;
	__f_ = (__base*)&__buf_;
	}
	else
	_VSTD::swap(__f_, __f.__f_);
	}

	template<class _Rp>
	_Rp
	function<_Rp()>::operator()() const
	{
	if (__f_ == 0)
	__throw_bad_function_call();
	return (*__f_)();
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Rp>
	const std::type_info&
	function<_Rp()>::target_type() const
	{
	if (__f_ == 0)
	return typeid(void);
	return __f_->target_type();
	}

	template<class _Rp>
	template <typename _Tp>
	_Tp*
	function<_Rp()>::target()
	{
	if (__f_ == 0)
	return (_Tp*)0;
	return (_Tp) const_cast<void >(__f_->target(typeid(_Tp)));
	}

	template<class _Rp>
	template <typename _Tp>
	const _Tp*
	function<_Rp()>::target() const
	{
	if (__f_ == 0)
	return (const _Tp*)0;
	return (const _Tp*)__f_->target(typeid(_Tp));
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Rp, class _A0>
	class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0)>
	: public unary_function<_A0, _Rp>
	{
	typedef __function::__base<_Rp(_A0)> __base;
	aligned_storage<3sizeof(void)>::type __buf_;
	__base* __f_;

	public:
	typedef _Rp result_type;

	// 20.7.16.2.1, construct/copy/destroy:
	_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
	_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
	function(const function&);
	template<class _Fp>
	function(_Fp,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&) : __f_(0) {}
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, const function&);
	template<class _Fp, class _Alloc>
	function(allocator_arg_t, const _Alloc& __a, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	function& operator=(const function&);
	function& operator=(nullptr_t);
	template<class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function&
	>::type
	operator=(_Fp);

	~function();

	// 20.7.16.2.2, function modifiers:
	void swap(function&);
	template<class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	void assign(_Fp __f, const _Alloc& __a)
	{function(allocator_arg, __a, __f).swap(*this);}

	// 20.7.16.2.3, function capacity:
	_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}

	template<class _R2, class _B0>
	bool operator==(const function<_R2(_B0)>&) const = delete;
	template<class _R2, class _B0>
	bool operator!=(const function<_R2(_B0)>&) const = delete;

	// 20.7.16.2.4, function invocation:
	_Rp operator()(_A0) const;

	#ifndef _LIBCPP_NO_RTTI
	// 20.7.16.2.5, function target access:
	const std::type_info& target_type() const;
	template <typename _Tp> _Tp* target();
	template <typename _Tp> const _Tp* target() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0>
	function<_Rp(_A0)>::function(const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0>
	template<class _Alloc>
	function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc&, const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0>
	template <class _Fp>
	function<_Rp(_A0)>::function(_Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f);
	}
	else
	{
	typedef allocator<_FF> _Ap;
	_Ap __a;
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0>
	template <class _Fp, class _Alloc>
	function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, _Alloc, _Rp(_A0)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f, __a0);
	}
	else
	{
	typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
	_Ap __a(__a0);
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0>
	function<_Rp(_A0)>&
	function<_Rp(_A0)>::operator=(const function& __f)
	{
	if (__f)
	function(__f).swap(*this);
	else
	*this = nullptr;
	return *this;
	}

	template<class _Rp, class _A0>
	function<_Rp(_A0)>&
	function<_Rp(_A0)>::operator=(nullptr_t)
	{
	__base* __t = __f_;
	__f_ = 0;
	if (__t == (__base*)&__buf_)
	__t->destroy();
	else if (__t)
	__t->destroy_deallocate();
	return *this;
	}

	template<class _Rp, class _A0>
	template <class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function<_Rp(_A0)>&
	>::type
	function<_Rp(_A0)>::operator=(_Fp __f)
	{
	function(_VSTD::move(__f)).swap(*this);
	return *this;
	}

	template<class _Rp, class _A0>
	function<_Rp(_A0)>::~function()
	{
	if (__f_ == (__base*)&__buf_)
	__f_->destroy();
	else if (__f_)
	__f_->destroy_deallocate();
	}

	template<class _Rp, class _A0>
	void
	function<_Rp(_A0)>::swap(function& __f)
	{
	if (_VSTD::addressof(__f) == this)
	return;
	if (__f_ == (__base)&__buf_ && __f.__f_ == (__base)&__f.__buf_)
	{
	typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
	__base* __t = (__base*)&__tempbuf;
	__f_->__clone(__t);
	__f_->destroy();
	__f_ = 0;
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = 0;
	__f_ = (__base*)&__buf_;
	__t->__clone((__base*)&__f.__buf_);
	__t->destroy();
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f_ == (__base*)&__buf_)
	{
	__f_->__clone((__base*)&__f.__buf_);
	__f_->destroy();
	__f_ = __f.__f_;
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f.__f_ == (__base*)&__f.__buf_)
	{
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = __f_;
	__f_ = (__base*)&__buf_;
	}
	else
	_VSTD::swap(__f_, __f.__f_);
	}

	template<class _Rp, class _A0>
	_Rp
	function<_Rp(_A0)>::operator()(_A0 __a0) const
	{
	if (__f_ == 0)
	__throw_bad_function_call();
	return (*__f_)(__a0);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Rp, class _A0>
	const std::type_info&
	function<_Rp(_A0)>::target_type() const
	{
	if (__f_ == 0)
	return typeid(void);
	return __f_->target_type();
	}

	template<class _Rp, class _A0>
	template <typename _Tp>
	_Tp*
	function<_Rp(_A0)>::target()
	{
	if (__f_ == 0)
	return (_Tp*)0;
	return (_Tp) const_cast<void >(__f_->target(typeid(_Tp)));
	}

	template<class _Rp, class _A0>
	template <typename _Tp>
	const _Tp*
	function<_Rp(_A0)>::target() const
	{
	if (__f_ == 0)
	return (const _Tp*)0;
	return (const _Tp*)__f_->target(typeid(_Tp));
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Rp, class _A0, class _A1>
	class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0, _A1)>
	: public binary_function<_A0, _A1, _Rp>
	{
	typedef __function::__base<_Rp(_A0, _A1)> __base;
	aligned_storage<3sizeof(void)>::type __buf_;
	__base* __f_;

	public:
	typedef _Rp result_type;

	// 20.7.16.2.1, construct/copy/destroy:
	_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
	_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
	function(const function&);
	template<class _Fp>
	function(_Fp,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&) : __f_(0) {}
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, const function&);
	template<class _Fp, class _Alloc>
	function(allocator_arg_t, const _Alloc& __a, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	function& operator=(const function&);
	function& operator=(nullptr_t);
	template<class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function&
	>::type
	operator=(_Fp);

	~function();

	// 20.7.16.2.2, function modifiers:
	void swap(function&);
	template<class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	void assign(_Fp __f, const _Alloc& __a)
	{function(allocator_arg, __a, __f).swap(*this);}

	// 20.7.16.2.3, function capacity:
	_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}

	template<class _R2, class _B0, class _B1>
	bool operator==(const function<_R2(_B0, _B1)>&) const = delete;
	template<class _R2, class _B0, class _B1>
	bool operator!=(const function<_R2(_B0, _B1)>&) const = delete;

	// 20.7.16.2.4, function invocation:
	_Rp operator()(_A0, _A1) const;

	#ifndef _LIBCPP_NO_RTTI
	// 20.7.16.2.5, function target access:
	const std::type_info& target_type() const;
	template <typename _Tp> _Tp* target();
	template <typename _Tp> const _Tp* target() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0, class _A1>
	function<_Rp(_A0, _A1)>::function(const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0, class _A1>
	template<class _Alloc>
	function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc&, const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0, class _A1>
	template <class _Fp>
	function<_Rp(_A0, _A1)>::function(_Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0, _A1)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f);
	}
	else
	{
	typedef allocator<_FF> _Ap;
	_Ap __a;
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0, class _A1>
	template <class _Fp, class _Alloc>
	function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, _Alloc, _Rp(_A0, _A1)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f, __a0);
	}
	else
	{
	typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
	_Ap __a(__a0);
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0, class _A1>
	function<_Rp(_A0, _A1)>&
	function<_Rp(_A0, _A1)>::operator=(const function& __f)
	{
	if (__f)
	function(__f).swap(*this);
	else
	*this = nullptr;
	return *this;
	}

	template<class _Rp, class _A0, class _A1>
	function<_Rp(_A0, _A1)>&
	function<_Rp(_A0, _A1)>::operator=(nullptr_t)
	{
	__base* __t = __f_;
	__f_ = 0;
	if (__t == (__base*)&__buf_)
	__t->destroy();
	else if (__t)
	__t->destroy_deallocate();
	return *this;
	}

	template<class _Rp, class _A0, class _A1>
	template <class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function<_Rp(_A0, _A1)>&
	>::type
	function<_Rp(_A0, _A1)>::operator=(_Fp __f)
	{
	function(_VSTD::move(__f)).swap(*this);
	return *this;
	}

	template<class _Rp, class _A0, class _A1>
	function<_Rp(_A0, _A1)>::~function()
	{
	if (__f_ == (__base*)&__buf_)
	__f_->destroy();
	else if (__f_)
	__f_->destroy_deallocate();
	}

	template<class _Rp, class _A0, class _A1>
	void
	function<_Rp(_A0, _A1)>::swap(function& __f)
	{
	if (_VSTD::addressof(__f) == this)
	return;
	if (__f_ == (__base)&__buf_ && __f.__f_ == (__base)&__f.__buf_)
	{
	typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
	__base* __t = (__base*)&__tempbuf;
	__f_->__clone(__t);
	__f_->destroy();
	__f_ = 0;
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = 0;
	__f_ = (__base*)&__buf_;
	__t->__clone((__base*)&__f.__buf_);
	__t->destroy();
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f_ == (__base*)&__buf_)
	{
	__f_->__clone((__base*)&__f.__buf_);
	__f_->destroy();
	__f_ = __f.__f_;
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f.__f_ == (__base*)&__f.__buf_)
	{
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = __f_;
	__f_ = (__base*)&__buf_;
	}
	else
	_VSTD::swap(__f_, __f.__f_);
	}

	template<class _Rp, class _A0, class _A1>
	_Rp
	function<_Rp(_A0, _A1)>::operator()(_A0 __a0, _A1 __a1) const
	{
	if (__f_ == 0)
	__throw_bad_function_call();
	return (*__f_)(__a0, __a1);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Rp, class _A0, class _A1>
	const std::type_info&
	function<_Rp(_A0, _A1)>::target_type() const
	{
	if (__f_ == 0)
	return typeid(void);
	return __f_->target_type();
	}

	template<class _Rp, class _A0, class _A1>
	template <typename _Tp>
	_Tp*
	function<_Rp(_A0, _A1)>::target()
	{
	if (__f_ == 0)
	return (_Tp*)0;
	return (_Tp) const_cast<void >(__f_->target(typeid(_Tp)));
	}

	template<class _Rp, class _A0, class _A1>
	template <typename _Tp>
	const _Tp*
	function<_Rp(_A0, _A1)>::target() const
	{
	if (__f_ == 0)
	return (const _Tp*)0;
	return (const _Tp*)__f_->target(typeid(_Tp));
	}

	#endif // _LIBCPP_NO_RTTI

	template<class _Rp, class _A0, class _A1, class _A2>
	class _LIBCPP_TEMPLATE_VIS function<_Rp(_A0, _A1, _A2)>
	{
	typedef __function::__base<_Rp(_A0, _A1, _A2)> __base;
	aligned_storage<3sizeof(void)>::type __buf_;
	__base* __f_;

	public:
	typedef _Rp result_type;

	// 20.7.16.2.1, construct/copy/destroy:
	_LIBCPP_INLINE_VISIBILITY explicit function() : __f_(0) {}
	_LIBCPP_INLINE_VISIBILITY function(nullptr_t) : __f_(0) {}
	function(const function&);
	template<class _Fp>
	function(_Fp,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&) : __f_(0) {}
	template<class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	function(allocator_arg_t, const _Alloc&, nullptr_t) : __f_(0) {}
	template<class _Alloc>
	function(allocator_arg_t, const _Alloc&, const function&);
	template<class _Fp, class _Alloc>
	function(allocator_arg_t, const _Alloc& __a, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type* = 0);

	function& operator=(const function&);
	function& operator=(nullptr_t);
	template<class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function&
	>::type
	operator=(_Fp);

	~function();

	// 20.7.16.2.2, function modifiers:
	void swap(function&);
	template<class _Fp, class _Alloc>
	_LIBCPP_INLINE_VISIBILITY
	void assign(_Fp __f, const _Alloc& __a)
	{function(allocator_arg, __a, __f).swap(*this);}

	// 20.7.16.2.3, function capacity:
	_LIBCPP_INLINE_VISIBILITY explicit operator bool() const {return __f_;}

	template<class _R2, class _B0, class _B1, class _B2>
	bool operator==(const function<_R2(_B0, _B1, _B2)>&) const = delete;
	template<class _R2, class _B0, class _B1, class _B2>
	bool operator!=(const function<_R2(_B0, _B1, _B2)>&) const = delete;

	// 20.7.16.2.4, function invocation:
	_Rp operator()(_A0, _A1, _A2) const;

	#ifndef _LIBCPP_NO_RTTI
	// 20.7.16.2.5, function target access:
	const std::type_info& target_type() const;
	template <typename _Tp> _Tp* target();
	template <typename _Tp> const _Tp* target() const;
	#endif // _LIBCPP_NO_RTTI
	};

	template<class _Rp, class _A0, class _A1, class _A2>
	function<_Rp(_A0, _A1, _A2)>::function(const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template<class _Alloc>
	function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc&,
	const function& __f)
	{
	if (__f.__f_ == 0)
	__f_ = 0;
	else if (__f.__f_ == (const __base*)&__f.__buf_)
	{
	__f_ = (__base*)&__buf_;
	__f.__f_->__clone(__f_);
	}
	else
	__f_ = __f.__f_->__clone();
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template <class _Fp>
	function<_Rp(_A0, _A1, _A2)>::function(_Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, allocator<_Fp>, _Rp(_A0, _A1, _A2)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f);
	}
	else
	{
	typedef allocator<_FF> _Ap;
	_Ap __a;
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template <class _Fp, class _Alloc>
	function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f,
	typename enable_if<!is_integral<_Fp>::value>::type*)
	: __f_(0)
	{
	typedef allocator_traits<_Alloc> __alloc_traits;
	if (__function::__not_null(__f))
	{
	typedef __function::__func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)> _FF;
	if (sizeof(_FF) <= sizeof(__buf_))
	{
	__f_ = (__base*)&__buf_;
	::new ((void*)__f_) _FF(__f, __a0);
	}
	else
	{
	typedef typename __rebind_alloc_helper<__alloc_traits, _FF>::type _Ap;
	_Ap __a(__a0);
	typedef __allocator_destructor<_Ap> _Dp;
	unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1));
	::new ((void*)__hold.get()) _FF(__f, _Alloc(__a));
	__f_ = __hold.release();
	}
	}
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	function<_Rp(_A0, _A1, _A2)>&
	function<_Rp(_A0, _A1, _A2)>::operator=(const function& __f)
	{
	if (__f)
	function(__f).swap(*this);
	else
	*this = nullptr;
	return *this;
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	function<_Rp(_A0, _A1, _A2)>&
	function<_Rp(_A0, _A1, _A2)>::operator=(nullptr_t)
	{
	__base* __t = __f_;
	__f_ = 0;
	if (__t == (__base*)&__buf_)
	__t->destroy();
	else if (__t)
	__t->destroy_deallocate();
	return *this;
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template <class _Fp>
	typename enable_if
	<
	!is_integral<_Fp>::value,
	function<_Rp(_A0, _A1, _A2)>&
	>::type
	function<_Rp(_A0, _A1, _A2)>::operator=(_Fp __f)
	{
	function(_VSTD::move(__f)).swap(*this);
	return *this;
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	function<_Rp(_A0, _A1, _A2)>::~function()
	{
	if (__f_ == (__base*)&__buf_)
	__f_->destroy();
	else if (__f_)
	__f_->destroy_deallocate();
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	void
	function<_Rp(_A0, _A1, _A2)>::swap(function& __f)
	{
	if (_VSTD::addressof(__f) == this)
	return;
	if (__f_ == (__base)&__buf_ && __f.__f_ == (__base)&__f.__buf_)
	{
	typename aligned_storage<sizeof(__buf_)>::type __tempbuf;
	__base* __t = (__base*)&__tempbuf;
	__f_->__clone(__t);
	__f_->destroy();
	__f_ = 0;
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = 0;
	__f_ = (__base*)&__buf_;
	__t->__clone((__base*)&__f.__buf_);
	__t->destroy();
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f_ == (__base*)&__buf_)
	{
	__f_->__clone((__base*)&__f.__buf_);
	__f_->destroy();
	__f_ = __f.__f_;
	__f.__f_ = (__base*)&__f.__buf_;
	}
	else if (__f.__f_ == (__base*)&__f.__buf_)
	{
	__f.__f_->__clone((__base*)&__buf_);
	__f.__f_->destroy();
	__f.__f_ = __f_;
	__f_ = (__base*)&__buf_;
	}
	else
	_VSTD::swap(__f_, __f.__f_);
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	_Rp
	function<_Rp(_A0, _A1, _A2)>::operator()(_A0 __a0, _A1 __a1, _A2 __a2) const
	{
	if (__f_ == 0)
	__throw_bad_function_call();
	return (*__f_)(__a0, __a1, __a2);
	}

	#ifndef _LIBCPP_NO_RTTI

	template<class _Rp, class _A0, class _A1, class _A2>
	const std::type_info&
	function<_Rp(_A0, _A1, _A2)>::target_type() const
	{
	if (__f_ == 0)
	return typeid(void);
	return __f_->target_type();
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template <typename _Tp>
	_Tp*
	function<_Rp(_A0, _A1, _A2)>::target()
	{
	if (__f_ == 0)
	return (_Tp*)0;
	return (_Tp) const_cast<void >(__f_->target(typeid(_Tp)));
	}

	template<class _Rp, class _A0, class _A1, class _A2>
	template <typename _Tp>
	const _Tp*
	function<_Rp(_A0, _A1, _A2)>::target() const
	{
	if (__f_ == 0)
	return (const _Tp*)0;
	return (const _Tp*)__f_->target(typeid(_Tp));
	}

	#endif // _LIBCPP_NO_RTTI

	template <class _Fp>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(const function<_Fp>& __f, nullptr_t) {return !__f;}

	template <class _Fp>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator==(nullptr_t, const function<_Fp>& __f) {return !__f;}

	template <class _Fp>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(const function<_Fp>& __f, nullptr_t) {return (bool)__f;}

	template <class _Fp>
	inline _LIBCPP_INLINE_VISIBILITY
	bool
	operator!=(nullptr_t, const function<_Fp>& __f) {return (bool)__f;}

	template <class _Fp>
	inline _LIBCPP_INLINE_VISIBILITY
	void
	swap(function<_Fp>& __x, function<_Fp>& __y)
	{return __x.swap(__y);}

	#endif // _LIBCPP_CXX03_LANG

	_LIBCPP_END_NAMESPACE_STD

	#endif // _LIBCPP___FUNCTIONAL_FUNCTION_H
	diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
	index 296fb4220012..58d863776430 100644
	--- a/lld/ELF/Driver.cpp
	+++ b/lld/ELF/Driver.cpp
	@@ -1,2800 +1,2803 @@
	//===- Driver.cpp ---------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// The driver drives the entire linking process. It is responsible for
	// parsing command line options and doing whatever it is instructed to do.
	//
	// One notable thing in the LLD's driver when compared to other linkers is
	// that the LLD's driver is agnostic on the host operating system.
	// Other linkers usually have implicit default values (such as a dynamic
	// linker path or library paths) for each host OS.
	//
	// I don't think implicit default values are useful because they are
	// usually explicitly specified by the compiler driver. They can even
	// be harmful when you are doing cross-linking. Therefore, in LLD, we
	// simply trust the compiler driver to pass all required options and
	// don't try to make effort on our side.
	//
	//===----------------------------------------------------------------------===//

	#include "Driver.h"
	#include "Config.h"
	#include "ICF.h"
	#include "InputFiles.h"
	#include "InputSection.h"
	#include "LinkerScript.h"
	#include "MarkLive.h"
	#include "OutputSections.h"
	#include "ScriptParser.h"
	#include "SymbolTable.h"
	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"
	#include "Writer.h"
	#include "lld/Common/Args.h"
	#include "lld/Common/Driver.h"
	#include "lld/Common/ErrorHandler.h"
	#include "lld/Common/Filesystem.h"
	#include "lld/Common/Memory.h"
	#include "lld/Common/Strings.h"
	#include "lld/Common/TargetOptionsCommandFlags.h"
	#include "lld/Common/Version.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/LTO/LTO.h"
	#include "llvm/Object/Archive.h"
	#include "llvm/Remarks/HotnessThresholdParser.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compression.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/GlobPattern.h"
	#include "llvm/Support/LEB128.h"
	#include "llvm/Support/Parallel.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/TarWriter.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Support/TimeProfiler.h"
	#include "llvm/Support/raw_ostream.h"
	#include <cstdlib>
	#include <utility>

	using namespace llvm;
	using namespace llvm::ELF;
	using namespace llvm::object;
	using namespace llvm::sys;
	using namespace llvm::support;
	using namespace lld;
	using namespace lld::elf;

	std::unique_ptr<Configuration> elf::config;
	std::unique_ptr<Ctx> elf::ctx;
	std::unique_ptr<LinkerDriver> elf::driver;

	static void setConfigs(opt::InputArgList &args);
	static void readConfigs(opt::InputArgList &args);

	void elf::errorOrWarn(const Twine &msg) {
	if (config->noinhibitExec)
	warn(msg);
	else
	error(msg);
	}

	bool elf::link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
	llvm::raw_ostream &stderrOS, bool exitEarly,
	bool disableOutput) {
	// This driver-specific context will be freed later by lldMain().
	auto *ctx = new CommonLinkerContext;

	ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
	ctx->e.cleanupCallback = []() {
	inputSections.clear();
	outputSections.clear();
	symAux.clear();

	tar = nullptr;
	in.reset();

	partitions.clear();
	partitions.emplace_back();

	SharedFile::vernauxNum = 0;
	};
	ctx->e.logName = args::getFilenameWithoutExe(args[0]);
	ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
	"--error-limit=0 to see all errors)";

	config = std::make_unique<Configuration>();
	elf::ctx = std::make_unique<Ctx>();
	driver = std::make_unique<LinkerDriver>();
	script = std::make_unique<LinkerScript>();
	symtab = std::make_unique<SymbolTable>();

	partitions.clear();
	partitions.emplace_back();

	config->progName = args[0];

	driver->linkerMain(args);

	return errorCount() == 0;
	}

	// Parses a linker -m option.
	static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
	uint8_t osabi = 0;
	StringRef s = emul;
	if (s.endswith("_fbsd")) {
	s = s.drop_back(5);
	osabi = ELFOSABI_FREEBSD;
	}

	std::pair<ELFKind, uint16_t> ret =
	StringSwitch<std::pair<ELFKind, uint16_t>>(s)
	.Cases("aarch64elf", "aarch64linux", {ELF64LEKind, EM_AARCH64})
	.Cases("aarch64elfb", "aarch64linuxb", {ELF64BEKind, EM_AARCH64})
	.Cases("armelf", "armelf_linux_eabi", {ELF32LEKind, EM_ARM})
	.Case("elf32_x86_64", {ELF32LEKind, EM_X86_64})
	.Cases("elf32btsmip", "elf32btsmipn32", {ELF32BEKind, EM_MIPS})
	.Cases("elf32ltsmip", "elf32ltsmipn32", {ELF32LEKind, EM_MIPS})
	.Case("elf32lriscv", {ELF32LEKind, EM_RISCV})
	.Cases("elf32ppc", "elf32ppclinux", {ELF32BEKind, EM_PPC})
	.Cases("elf32lppc", "elf32lppclinux", {ELF32LEKind, EM_PPC})
	.Case("elf64btsmip", {ELF64BEKind, EM_MIPS})
	.Case("elf64ltsmip", {ELF64LEKind, EM_MIPS})
	.Case("elf64lriscv", {ELF64LEKind, EM_RISCV})
	.Case("elf64ppc", {ELF64BEKind, EM_PPC64})
	.Case("elf64lppc", {ELF64LEKind, EM_PPC64})
	.Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
	.Case("elf_i386", {ELF32LEKind, EM_386})
	.Case("elf_iamcu", {ELF32LEKind, EM_IAMCU})
	.Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9})
	.Case("msp430elf", {ELF32LEKind, EM_MSP430})
	.Default({ELFNoneKind, EM_NONE});

	if (ret.first == ELFNoneKind)
	error("unknown emulation: " + emul);
	if (ret.second == EM_MSP430)
	osabi = ELFOSABI_STANDALONE;
	return std::make_tuple(ret.first, ret.second, osabi);
	}

	// Returns slices of MB by parsing MB as an archive file.
	// Each slice consists of a member file in the archive.
	std::vector<std::pair<MemoryBufferRef, uint64_t>> static getArchiveMembers(
	MemoryBufferRef mb) {
	std::unique_ptr<Archive> file =
	CHECK(Archive::create(mb),
	mb.getBufferIdentifier() + ": failed to parse archive");

	std::vector<std::pair<MemoryBufferRef, uint64_t>> v;
	Error err = Error::success();
	bool addToTar = file->isThin() && tar;
	for (const Archive::Child &c : file->children(err)) {
	MemoryBufferRef mbref =
	CHECK(c.getMemoryBufferRef(),
	mb.getBufferIdentifier() +
	": could not get the buffer for a child of the archive");
	if (addToTar)
	tar->append(relativeToRoot(check(c.getFullName())), mbref.getBuffer());
	v.push_back(std::make_pair(mbref, c.getChildOffset()));
	}
	if (err)
	fatal(mb.getBufferIdentifier() + ": Archive::children failed: " +
	toString(std::move(err)));

	// Take ownership of memory buffers created for members of thin archives.
	std::vector<std::unique_ptr<MemoryBuffer>> mbs = file->takeThinBuffers();
	std::move(mbs.begin(), mbs.end(), std::back_inserter(ctx->memoryBuffers));

	return v;
	}

	static bool isBitcode(MemoryBufferRef mb) {
	return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode;
	}

	// Opens a file and create a file object. Path has to be resolved already.
	void LinkerDriver::addFile(StringRef path, bool withLOption) {
	using namespace sys::fs;

	Optional<MemoryBufferRef> buffer = readFile(path);
	if (!buffer)
	return;
	MemoryBufferRef mbref = *buffer;

	if (config->formatBinary) {
	files.push_back(make<BinaryFile>(mbref));
	return;
	}

	switch (identify_magic(mbref.getBuffer())) {
	case file_magic::unknown:
	readLinkerScript(mbref);
	return;
	case file_magic::archive: {
	if (inWholeArchive) {
	for (const auto &p : getArchiveMembers(mbref)) {
	if (isBitcode(p.first))
	files.push_back(make<BitcodeFile>(p.first, path, p.second, false));
	else
	files.push_back(createObjFile(p.first, path));
	}
	return;
	}

	auto members = getArchiveMembers(mbref);
	archiveFiles.emplace_back(path, members.size());

	// Handle archives and --start-lib/--end-lib using the same code path. This
	// scans all the ELF relocatable object files and bitcode files in the
	// archive rather than just the index file, with the benefit that the
	// symbols are only loaded once. For many projects archives see high
	// utilization rates and it is a net performance win. --start-lib scans
	// symbols in the same order that llvm-ar adds them to the index, so in the
	// common case the semantics are identical. If the archive symbol table was
	// created in a different order, or is incomplete, this strategy has
	// different semantics. Such output differences are considered user error.
	//
	// All files within the archive get the same group ID to allow mutual
	// references for --warn-backrefs.
	bool saved = InputFile::isInGroup;
	InputFile::isInGroup = true;
	for (const std::pair<MemoryBufferRef, uint64_t> &p : members) {
	auto magic = identify_magic(p.first.getBuffer());
	if (magic == file_magic::elf_relocatable)
	files.push_back(createObjFile(p.first, path, true));
	else if (magic == file_magic::bitcode)
	files.push_back(make<BitcodeFile>(p.first, path, p.second, true));
	else
	warn(path + ": archive member '" + p.first.getBufferIdentifier() +
	"' is neither ET_REL nor LLVM bitcode");
	}
	InputFile::isInGroup = saved;
	if (!saved)
	++InputFile::nextGroupId;
	return;
	}
	case file_magic::elf_shared_object:
	if (config->isStatic \|\| config->relocatable) {
	error("attempted static link of dynamic object " + path);
	return;
	}

	// Shared objects are identified by soname. soname is (if specified)
	// DT_SONAME and falls back to filename. If a file was specified by -lfoo,
	// the directory part is ignored. Note that path may be a temporary and
	// cannot be stored into SharedFile::soName.
	path = mbref.getBufferIdentifier();
	files.push_back(
	make<SharedFile>(mbref, withLOption ? path::filename(path) : path));
	return;
	case file_magic::bitcode:
	files.push_back(make<BitcodeFile>(mbref, "", 0, inLib));
	break;
	case file_magic::elf_relocatable:
	files.push_back(createObjFile(mbref, "", inLib));
	break;
	default:
	error(path + ": unknown file type");
	}
	}

	// Add a given library by searching it from input search paths.
	void LinkerDriver::addLibrary(StringRef name) {
	if (Optional<std::string> path = searchLibrary(name))
	addFile(saver().save(path), /withLOption=*/true);
	else
	error("unable to find library -l" + name, ErrorTag::LibNotFound, {name});
	}

	// This function is called on startup. We need this for LTO since
	// LTO calls LLVM functions to compile bitcode files to native code.
	// Technically this can be delayed until we read bitcode files, but
	// we don't bother to do lazily because the initialization is fast.
	static void initLLVM() {
	InitializeAllTargets();
	InitializeAllTargetMCs();
	InitializeAllAsmPrinters();
	InitializeAllAsmParsers();
	}

	// Some command line options or some combinations of them are not allowed.
	// This function checks for such errors.
	static void checkOptions() {
	// The MIPS ABI as of 2016 does not support the GNU-style symbol lookup
	// table which is a relatively new feature.
	if (config->emachine == EM_MIPS && config->gnuHash)
	error("the .gnu.hash section is not compatible with the MIPS target");

	if (config->fixCortexA53Errata843419 && config->emachine != EM_AARCH64)
	error("--fix-cortex-a53-843419 is only supported on AArch64 targets");

	if (config->fixCortexA8 && config->emachine != EM_ARM)
	error("--fix-cortex-a8 is only supported on ARM targets");

	if (config->tocOptimize && config->emachine != EM_PPC64)
	error("--toc-optimize is only supported on PowerPC64 targets");

	if (config->pcRelOptimize && config->emachine != EM_PPC64)
	error("--pcrel-optimize is only supported on PowerPC64 targets");

	if (config->pie && config->shared)
	error("-shared and -pie may not be used together");

	if (!config->shared && !config->filterList.empty())
	error("-F may not be used without -shared");

	if (!config->shared && !config->auxiliaryList.empty())
	error("-f may not be used without -shared");

	if (config->strip == StripPolicy::All && config->emitRelocs)
	error("--strip-all and --emit-relocs may not be used together");

	if (config->zText && config->zIfuncNoplt)
	error("-z text and -z ifunc-noplt may not be used together");

	if (config->relocatable) {
	if (config->shared)
	error("-r and -shared may not be used together");
	if (config->gdbIndex)
	error("-r and --gdb-index may not be used together");
	if (config->icf != ICFLevel::None)
	error("-r and --icf may not be used together");
	if (config->pie)
	error("-r and -pie may not be used together");
	if (config->exportDynamic)
	error("-r and --export-dynamic may not be used together");
	}

	if (config->executeOnly) {
	if (config->emachine != EM_AARCH64)
	error("--execute-only is only supported on AArch64 targets");

	if (config->singleRoRx && !script->hasSectionsCommand)
	error("--execute-only and --no-rosegment cannot be used together");
	}

	if (config->zRetpolineplt && config->zForceIbt)
	error("-z force-ibt may not be used with -z retpolineplt");

	if (config->emachine != EM_AARCH64) {
	if (config->zPacPlt)
	error("-z pac-plt only supported on AArch64");
	if (config->zForceBti)
	error("-z force-bti only supported on AArch64");
	if (config->zBtiReport != "none")
	error("-z bti-report only supported on AArch64");
	}

	if (config->emachine != EM_386 && config->emachine != EM_X86_64 &&
	config->zCetReport != "none")
	error("-z cet-report only supported on X86 and X86_64");
	}

	static const char *getReproduceOption(opt::InputArgList &args) {
	if (auto *arg = args.getLastArg(OPT_reproduce))
	return arg->getValue();
	return getenv("LLD_REPRODUCE");
	}

	static bool hasZOption(opt::InputArgList &args, StringRef key) {
	for (auto *arg : args.filtered(OPT_z))
	if (key == arg->getValue())
	return true;
	return false;
	}

	static bool getZFlag(opt::InputArgList &args, StringRef k1, StringRef k2,
	bool Default) {
	for (auto *arg : args.filtered_reverse(OPT_z)) {
	if (k1 == arg->getValue())
	return true;
	if (k2 == arg->getValue())
	return false;
	}
	return Default;
	}

	static SeparateSegmentKind getZSeparate(opt::InputArgList &args) {
	for (auto *arg : args.filtered_reverse(OPT_z)) {
	StringRef v = arg->getValue();
	if (v == "noseparate-code")
	return SeparateSegmentKind::None;
	if (v == "separate-code")
	return SeparateSegmentKind::Code;
	if (v == "separate-loadable-segments")
	return SeparateSegmentKind::Loadable;
	}
	return SeparateSegmentKind::None;
	}

	static GnuStackKind getZGnuStack(opt::InputArgList &args) {
	for (auto *arg : args.filtered_reverse(OPT_z)) {
	if (StringRef("execstack") == arg->getValue())
	return GnuStackKind::Exec;
	if (StringRef("noexecstack") == arg->getValue())
	return GnuStackKind::NoExec;
	if (StringRef("nognustack") == arg->getValue())
	return GnuStackKind::None;
	}

	return GnuStackKind::NoExec;
	}

	static uint8_t getZStartStopVisibility(opt::InputArgList &args) {
	for (auto *arg : args.filtered_reverse(OPT_z)) {
	std::pair<StringRef, StringRef> kv = StringRef(arg->getValue()).split('=');
	if (kv.first == "start-stop-visibility") {
	if (kv.second == "default")
	return STV_DEFAULT;
	else if (kv.second == "internal")
	return STV_INTERNAL;
	else if (kv.second == "hidden")
	return STV_HIDDEN;
	else if (kv.second == "protected")
	return STV_PROTECTED;
	error("unknown -z start-stop-visibility= value: " + StringRef(kv.second));
	}
	}
	return STV_PROTECTED;
	}

	constexpr const char *knownZFlags[] = {
	"combreloc",
	"copyreloc",
	"defs",
	"execstack",
	"force-bti",
	"force-ibt",
	"global",
	"hazardplt",
	"ifunc-noplt",
	"initfirst",
	"interpose",
	"keep-text-section-prefix",
	"lazy",
	"muldefs",
	"nocombreloc",
	"nocopyreloc",
	"nodefaultlib",
	"nodelete",
	"nodlopen",
	"noexecstack",
	"nognustack",
	"nokeep-text-section-prefix",
	"nopack-relative-relocs",
	"norelro",
	"noseparate-code",
	"nostart-stop-gc",
	"notext",
	"now",
	"origin",
	"pac-plt",
	"pack-relative-relocs",
	"rel",
	"rela",
	"relro",
	"retpolineplt",
	"rodynamic",
	"separate-code",
	"separate-loadable-segments",
	"shstk",
	"start-stop-gc",
	"text",
	"undefs",
	"wxneeded",
	};

	static bool isKnownZFlag(StringRef s) {
	return llvm::is_contained(knownZFlags, s) \|\|
	s.startswith("common-page-size=") \|\| s.startswith("bti-report=") \|\|
	s.startswith("cet-report=") \|\|
	s.startswith("dead-reloc-in-nonalloc=") \|\|
	s.startswith("max-page-size=") \|\| s.startswith("stack-size=") \|\|
	s.startswith("start-stop-visibility=");
	}

	// Report a warning for an unknown -z option.
	static void checkZOptions(opt::InputArgList &args) {
	for (auto *arg : args.filtered(OPT_z))
	if (!isKnownZFlag(arg->getValue()))
	warn("unknown -z value: " + StringRef(arg->getValue()));
	}

	constexpr const char *saveTempsValues[] = {
	"resolution", "preopt", "promote", "internalize", "import",
	"opt", "precodegen", "prelink", "combinedindex"};

	void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
	ELFOptTable parser;
	opt::InputArgList args = parser.parse(argsArr.slice(1));

	// Interpret these flags early because error()/warn() depend on them.
	errorHandler().errorLimit = args::getInteger(args, OPT_error_limit, 20);
	errorHandler().fatalWarnings =
	args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false);
	checkZOptions(args);

	// Handle -help
	if (args.hasArg(OPT_help)) {
	printHelp();
	return;
	}

	// Handle -v or -version.
	//
	// A note about "compatible with GNU linkers" message: this is a hack for
	// scripts generated by GNU Libtool up to 2021-10 to recognize LLD as
	// a GNU compatible linker. See
	// <https://lists.gnu.org/archive/html/libtool/2017-01/msg00007.html>.
	//
	// This is somewhat ugly hack, but in reality, we had no choice other
	// than doing this. Considering the very long release cycle of Libtool,
	// it is not easy to improve it to recognize LLD as a GNU compatible
	// linker in a timely manner. Even if we can make it, there are still a
	// lot of "configure" scripts out there that are generated by old version
	// of Libtool. We cannot convince every software developer to migrate to
	// the latest version and re-generate scripts. So we have this hack.
	if (args.hasArg(OPT_v) \|\| args.hasArg(OPT_version))
	message(getLLDVersion() + " (compatible with GNU linkers)");

	if (const char *path = getReproduceOption(args)) {
	// Note that --reproduce is a debug option so you can ignore it
	// if you are trying to understand the whole picture of the code.
	Expected<std::unique_ptr<TarWriter>> errOrWriter =
	TarWriter::create(path, path::stem(path));
	if (errOrWriter) {
	tar = std::move(*errOrWriter);
	tar->append("response.txt", createResponseFile(args));
	tar->append("version.txt", getLLDVersion() + "\n");
	StringRef ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
	if (!ltoSampleProfile.empty())
	readFile(ltoSampleProfile);
	} else {
	error("--reproduce: " + toString(errOrWriter.takeError()));
	}
	}

	readConfigs(args);

	// The behavior of -v or --version is a bit strange, but this is
	// needed for compatibility with GNU linkers.
	if (args.hasArg(OPT_v) && !args.hasArg(OPT_INPUT))
	return;
	if (args.hasArg(OPT_version))
	return;

	// Initialize time trace profiler.
	if (config->timeTraceEnabled)
	timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName);

	{
	llvm::TimeTraceScope timeScope("ExecuteLinker");

	initLLVM();
	createFiles(args);
	if (errorCount())
	return;

	inferMachineType();
	setConfigs(args);
	checkOptions();
	if (errorCount())
	return;

	link(args);
	}

	if (config->timeTraceEnabled) {
	checkError(timeTraceProfilerWrite(
	args.getLastArgValue(OPT_time_trace_eq).str(), config->outputFile));
	timeTraceProfilerCleanup();
	}
	}

	static std::string getRpath(opt::InputArgList &args) {
	std::vector<StringRef> v = args::getStrings(args, OPT_rpath);
	return llvm::join(v.begin(), v.end(), ":");
	}

	// Determines what we should do if there are remaining unresolved
	// symbols after the name resolution.
	static void setUnresolvedSymbolPolicy(opt::InputArgList &args) {
	UnresolvedPolicy errorOrWarn = args.hasFlag(OPT_error_unresolved_symbols,
	OPT_warn_unresolved_symbols, true)
	? UnresolvedPolicy::ReportError
	: UnresolvedPolicy::Warn;
	// -shared implies --unresolved-symbols=ignore-all because missing
	// symbols are likely to be resolved at runtime.
	bool diagRegular = !config->shared, diagShlib = !config->shared;

	for (const opt::Arg *arg : args) {
	switch (arg->getOption().getID()) {
	case OPT_unresolved_symbols: {
	StringRef s = arg->getValue();
	if (s == "ignore-all") {
	diagRegular = false;
	diagShlib = false;
	} else if (s == "ignore-in-object-files") {
	diagRegular = false;
	diagShlib = true;
	} else if (s == "ignore-in-shared-libs") {
	diagRegular = true;
	diagShlib = false;
	} else if (s == "report-all") {
	diagRegular = true;
	diagShlib = true;
	} else {
	error("unknown --unresolved-symbols value: " + s);
	}
	break;
	}
	case OPT_no_undefined:
	diagRegular = true;
	break;
	case OPT_z:
	if (StringRef(arg->getValue()) == "defs")
	diagRegular = true;
	else if (StringRef(arg->getValue()) == "undefs")
	diagRegular = false;
	break;
	case OPT_allow_shlib_undefined:
	diagShlib = false;
	break;
	case OPT_no_allow_shlib_undefined:
	diagShlib = true;
	break;
	}
	}

	config->unresolvedSymbols =
	diagRegular ? errorOrWarn : UnresolvedPolicy::Ignore;
	config->unresolvedSymbolsInShlib =
	diagShlib ? errorOrWarn : UnresolvedPolicy::Ignore;
	}

	static Target2Policy getTarget2(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_target2, "got-rel");
	if (s == "rel")
	return Target2Policy::Rel;
	if (s == "abs")
	return Target2Policy::Abs;
	if (s == "got-rel")
	return Target2Policy::GotRel;
	error("unknown --target2 option: " + s);
	return Target2Policy::GotRel;
	}

	static bool isOutputFormatBinary(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_oformat, "elf");
	if (s == "binary")
	return true;
	if (!s.startswith("elf"))
	error("unknown --oformat value: " + s);
	return false;
	}

	static DiscardPolicy getDiscard(opt::InputArgList &args) {
	auto *arg =
	args.getLastArg(OPT_discard_all, OPT_discard_locals, OPT_discard_none);
	if (!arg)
	return DiscardPolicy::Default;
	if (arg->getOption().getID() == OPT_discard_all)
	return DiscardPolicy::All;
	if (arg->getOption().getID() == OPT_discard_locals)
	return DiscardPolicy::Locals;
	return DiscardPolicy::None;
	}

	static StringRef getDynamicLinker(opt::InputArgList &args) {
	auto *arg = args.getLastArg(OPT_dynamic_linker, OPT_no_dynamic_linker);
	if (!arg)
	return "";
	if (arg->getOption().getID() == OPT_no_dynamic_linker) {
	// --no-dynamic-linker suppresses undefined weak symbols in .dynsym
	config->noDynamicLinker = true;
	return "";
	}
	return arg->getValue();
	}

	static int getMemtagMode(opt::InputArgList &args) {
	StringRef memtagModeArg = args.getLastArgValue(OPT_android_memtag_mode);
	if (!config->androidMemtagHeap && !config->androidMemtagStack) {
	if (!memtagModeArg.empty())
	error("when using --android-memtag-mode, at least one of "
	"--android-memtag-heap or "
	"--android-memtag-stack is required");
	return ELF::NT_MEMTAG_LEVEL_NONE;
	}

	if (memtagModeArg == "sync" \|\| memtagModeArg.empty())
	return ELF::NT_MEMTAG_LEVEL_SYNC;
	if (memtagModeArg == "async")
	return ELF::NT_MEMTAG_LEVEL_ASYNC;
	if (memtagModeArg == "none")
	return ELF::NT_MEMTAG_LEVEL_NONE;

	error("unknown --android-memtag-mode value: \"" + memtagModeArg +
	"\", should be one of {async, sync, none}");
	return ELF::NT_MEMTAG_LEVEL_NONE;
	}

	static ICFLevel getICF(opt::InputArgList &args) {
	auto *arg = args.getLastArg(OPT_icf_none, OPT_icf_safe, OPT_icf_all);
	if (!arg \|\| arg->getOption().getID() == OPT_icf_none)
	return ICFLevel::None;
	if (arg->getOption().getID() == OPT_icf_safe)
	return ICFLevel::Safe;
	return ICFLevel::All;
	}

	static StripPolicy getStrip(opt::InputArgList &args) {
	if (args.hasArg(OPT_relocatable))
	return StripPolicy::None;

	auto *arg = args.getLastArg(OPT_strip_all, OPT_strip_debug);
	if (!arg)
	return StripPolicy::None;
	if (arg->getOption().getID() == OPT_strip_all)
	return StripPolicy::All;
	return StripPolicy::Debug;
	}

	static uint64_t parseSectionAddress(StringRef s, opt::InputArgList &args,
	const opt::Arg &arg) {
	uint64_t va = 0;
	if (s.startswith("0x"))
	s = s.drop_front(2);
	if (!to_integer(s, va, 16))
	error("invalid argument: " + arg.getAsString(args));
	return va;
	}

	static StringMap<uint64_t> getSectionStartMap(opt::InputArgList &args) {
	StringMap<uint64_t> ret;
	for (auto *arg : args.filtered(OPT_section_start)) {
	StringRef name;
	StringRef addr;
	std::tie(name, addr) = StringRef(arg->getValue()).split('=');
	ret[name] = parseSectionAddress(addr, args, *arg);
	}

	if (auto *arg = args.getLastArg(OPT_Ttext))
	ret[".text"] = parseSectionAddress(arg->getValue(), args, *arg);
	if (auto *arg = args.getLastArg(OPT_Tdata))
	ret[".data"] = parseSectionAddress(arg->getValue(), args, *arg);
	if (auto *arg = args.getLastArg(OPT_Tbss))
	ret[".bss"] = parseSectionAddress(arg->getValue(), args, *arg);
	return ret;
	}

	static SortSectionPolicy getSortSection(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_sort_section);
	if (s == "alignment")
	return SortSectionPolicy::Alignment;
	if (s == "name")
	return SortSectionPolicy::Name;
	if (!s.empty())
	error("unknown --sort-section rule: " + s);
	return SortSectionPolicy::Default;
	}

	static OrphanHandlingPolicy getOrphanHandling(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_orphan_handling, "place");
	if (s == "warn")
	return OrphanHandlingPolicy::Warn;
	if (s == "error")
	return OrphanHandlingPolicy::Error;
	if (s != "place")
	error("unknown --orphan-handling mode: " + s);
	return OrphanHandlingPolicy::Place;
	}

	// Parse --build-id or --build-id=<style>. We handle "tree" as a
	// synonym for "sha1" because all our hash functions including
	// --build-id=sha1 are actually tree hashes for performance reasons.
	static std::pair<BuildIdKind, std::vector<uint8_t>>
	getBuildId(opt::InputArgList &args) {
	auto *arg = args.getLastArg(OPT_build_id);
	if (!arg)
	return {BuildIdKind::None, {}};

	StringRef s = arg->getValue();
	if (s == "fast")
	return {BuildIdKind::Fast, {}};
	if (s == "md5")
	return {BuildIdKind::Md5, {}};
	if (s == "sha1" \|\| s == "tree")
	return {BuildIdKind::Sha1, {}};
	if (s == "uuid")
	return {BuildIdKind::Uuid, {}};
	if (s.startswith("0x"))
	return {BuildIdKind::Hexstring, parseHex(s.substr(2))};

	if (s != "none")
	error("unknown --build-id style: " + s);
	return {BuildIdKind::None, {}};
	}

	static std::pair<bool, bool> getPackDynRelocs(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_pack_dyn_relocs, "none");
	if (s == "android")
	return {true, false};
	if (s == "relr")
	return {false, true};
	if (s == "android+relr")
	return {true, true};

	if (s != "none")
	error("unknown --pack-dyn-relocs format: " + s);
	return {false, false};
	}

	static void readCallGraph(MemoryBufferRef mb) {
	// Build a map from symbol name to section
	DenseMap<StringRef, Symbol *> map;
	for (ELFFileBase *file : ctx->objectFiles)
	for (Symbol *sym : file->getSymbols())
	map[sym->getName()] = sym;

	auto findSection = [&](StringRef name) -> InputSectionBase * {
	Symbol *sym = map.lookup(name);
	if (!sym) {
	if (config->warnSymbolOrdering)
	warn(mb.getBufferIdentifier() + ": no such symbol: " + name);
	return nullptr;
	}
	maybeWarnUnorderableSymbol(sym);

	if (Defined *dr = dyn_cast_or_null<Defined>(sym))
	return dyn_cast_or_null<InputSectionBase>(dr->section);
	return nullptr;
	};

	for (StringRef line : args::getLines(mb)) {
	SmallVector<StringRef, 3> fields;
	line.split(fields, ' ');
	uint64_t count;

	if (fields.size() != 3 \|\| !to_integer(fields[2], count)) {
	error(mb.getBufferIdentifier() + ": parse error");
	return;
	}

	if (InputSectionBase *from = findSection(fields[0]))
	if (InputSectionBase *to = findSection(fields[1]))
	config->callGraphProfile[std::make_pair(from, to)] += count;
	}
	}

	// If SHT_LLVM_CALL_GRAPH_PROFILE and its relocation section exist, returns
	// true and populates cgProfile and symbolIndices.
	template <class ELFT>
	static bool
	processCallGraphRelocations(SmallVector<uint32_t, 32> &symbolIndices,
	ArrayRef<typename ELFT::CGProfile> &cgProfile,
	ObjFile<ELFT> *inputObj) {
	if (inputObj->cgProfileSectionIndex == SHN_UNDEF)
	return false;

	ArrayRef<Elf_Shdr_Impl<ELFT>> objSections =
	inputObj->template getELFShdrs<ELFT>();
	symbolIndices.clear();
	const ELFFile<ELFT> &obj = inputObj->getObj();
	cgProfile =
	check(obj.template getSectionContentsAsArray<typename ELFT::CGProfile>(
	objSections[inputObj->cgProfileSectionIndex]));

	for (size_t i = 0, e = objSections.size(); i < e; ++i) {
	const Elf_Shdr_Impl<ELFT> &sec = objSections[i];
	if (sec.sh_info == inputObj->cgProfileSectionIndex) {
	if (sec.sh_type == SHT_RELA) {
	ArrayRef<typename ELFT::Rela> relas =
	CHECK(obj.relas(sec), "could not retrieve cg profile rela section");
	for (const typename ELFT::Rela &rel : relas)
	symbolIndices.push_back(rel.getSymbol(config->isMips64EL));
	break;
	}
	if (sec.sh_type == SHT_REL) {
	ArrayRef<typename ELFT::Rel> rels =
	CHECK(obj.rels(sec), "could not retrieve cg profile rel section");
	for (const typename ELFT::Rel &rel : rels)
	symbolIndices.push_back(rel.getSymbol(config->isMips64EL));
	break;
	}
	}
	}
	if (symbolIndices.empty())
	warn("SHT_LLVM_CALL_GRAPH_PROFILE exists, but relocation section doesn't");
	return !symbolIndices.empty();
	}

	template <class ELFT> static void readCallGraphsFromObjectFiles() {
	SmallVector<uint32_t, 32> symbolIndices;
	ArrayRef<typename ELFT::CGProfile> cgProfile;
	for (auto file : ctx->objectFiles) {
	auto *obj = cast<ObjFile<ELFT>>(file);
	if (!processCallGraphRelocations(symbolIndices, cgProfile, obj))
	continue;

	if (symbolIndices.size() != cgProfile.size() * 2)
	fatal("number of relocations doesn't match Weights");

	for (uint32_t i = 0, size = cgProfile.size(); i < size; ++i) {
	const Elf_CGProfile_Impl<ELFT> &cgpe = cgProfile[i];
	uint32_t fromIndex = symbolIndices[i * 2];
	uint32_t toIndex = symbolIndices[i * 2 + 1];
	auto *fromSym = dyn_cast<Defined>(&obj->getSymbol(fromIndex));
	auto *toSym = dyn_cast<Defined>(&obj->getSymbol(toIndex));
	if (!fromSym \|\| !toSym)
	continue;

	auto *from = dyn_cast_or_null<InputSectionBase>(fromSym->section);
	auto *to = dyn_cast_or_null<InputSectionBase>(toSym->section);
	if (from && to)
	config->callGraphProfile[{from, to}] += cgpe.cgp_weight;
	}
	}
	}

	static bool getCompressDebugSections(opt::InputArgList &args) {
	StringRef s = args.getLastArgValue(OPT_compress_debug_sections, "none");
	if (s == "none")
	return false;
	if (s != "zlib")
	error("unknown --compress-debug-sections value: " + s);
	if (!compression::zlib::isAvailable())
	error("--compress-debug-sections: zlib is not available");
	return true;
	}

	static StringRef getAliasSpelling(opt::Arg *arg) {
	if (const opt::Arg *alias = arg->getAlias())
	return alias->getSpelling();
	return arg->getSpelling();
	}

	static std::pair<StringRef, StringRef> getOldNewOptions(opt::InputArgList &args,
	unsigned id) {
	auto *arg = args.getLastArg(id);
	if (!arg)
	return {"", ""};

	StringRef s = arg->getValue();
	std::pair<StringRef, StringRef> ret = s.split(';');
	if (ret.second.empty())
	error(getAliasSpelling(arg) + " expects 'old;new' format, but got " + s);
	return ret;
	}

	// Parse the symbol ordering file and warn for any duplicate entries.
	static std::vector<StringRef> getSymbolOrderingFile(MemoryBufferRef mb) {
	SetVector<StringRef> names;
	for (StringRef s : args::getLines(mb))
	if (!names.insert(s) && config->warnSymbolOrdering)
	warn(mb.getBufferIdentifier() + ": duplicate ordered symbol: " + s);

	return names.takeVector();
	}

	static bool getIsRela(opt::InputArgList &args) {
	// If -z rel or -z rela is specified, use the last option.
	for (auto *arg : args.filtered_reverse(OPT_z)) {
	StringRef s(arg->getValue());
	if (s == "rel")
	return false;
	if (s == "rela")
	return true;
	}

	// Otherwise use the psABI defined relocation entry format.
	uint16_t m = config->emachine;
	return m == EM_AARCH64 \|\| m == EM_AMDGPU \|\| m == EM_HEXAGON \|\| m == EM_PPC \|\|
	m == EM_PPC64 \|\| m == EM_RISCV \|\| m == EM_X86_64;
	}

	static void parseClangOption(StringRef opt, const Twine &msg) {
	std::string err;
	raw_string_ostream os(err);

	const char *argv[] = {config->progName.data(), opt.data()};
	if (cl::ParseCommandLineOptions(2, argv, "", &os))
	return;
	os.flush();
	error(msg + ": " + StringRef(err).trim());
	}

	// Checks the parameter of the bti-report and cet-report options.
	static bool isValidReportString(StringRef arg) {
	return arg == "none" \|\| arg == "warning" \|\| arg == "error";
	}

	// Initializes Config members by the command line options.
	static void readConfigs(opt::InputArgList &args) {
	errorHandler().verbose = args.hasArg(OPT_verbose);
	errorHandler().vsDiagnostics =
	args.hasArg(OPT_visual_studio_diagnostics_format, false);

	config->allowMultipleDefinition =
	args.hasFlag(OPT_allow_multiple_definition,
	OPT_no_allow_multiple_definition, false) \|\|
	hasZOption(args, "muldefs");
	config->androidMemtagHeap =
	args.hasFlag(OPT_android_memtag_heap, OPT_no_android_memtag_heap, false);
	config->androidMemtagStack = args.hasFlag(OPT_android_memtag_stack,
	OPT_no_android_memtag_stack, false);
	config->androidMemtagMode = getMemtagMode(args);
	config->auxiliaryList = args::getStrings(args, OPT_auxiliary);
	if (opt::Arg *arg =
	args.getLastArg(OPT_Bno_symbolic, OPT_Bsymbolic_non_weak_functions,
	OPT_Bsymbolic_functions, OPT_Bsymbolic)) {
	if (arg->getOption().matches(OPT_Bsymbolic_non_weak_functions))
	config->bsymbolic = BsymbolicKind::NonWeakFunctions;
	else if (arg->getOption().matches(OPT_Bsymbolic_functions))
	config->bsymbolic = BsymbolicKind::Functions;
	else if (arg->getOption().matches(OPT_Bsymbolic))
	config->bsymbolic = BsymbolicKind::All;
	}
	config->checkSections =
	args.hasFlag(OPT_check_sections, OPT_no_check_sections, true);
	config->chroot = args.getLastArgValue(OPT_chroot);
	config->compressDebugSections = getCompressDebugSections(args);
	config->cref = args.hasArg(OPT_cref);
	config->optimizeBBJumps =
	args.hasFlag(OPT_optimize_bb_jumps, OPT_no_optimize_bb_jumps, false);
	config->demangle = args.hasFlag(OPT_demangle, OPT_no_demangle, true);
	config->dependencyFile = args.getLastArgValue(OPT_dependency_file);
	config->dependentLibraries = args.hasFlag(OPT_dependent_libraries, OPT_no_dependent_libraries, true);
	config->disableVerify = args.hasArg(OPT_disable_verify);
	config->discard = getDiscard(args);
	config->dwoDir = args.getLastArgValue(OPT_plugin_opt_dwo_dir_eq);
	config->dynamicLinker = getDynamicLinker(args);
	config->ehFrameHdr =
	args.hasFlag(OPT_eh_frame_hdr, OPT_no_eh_frame_hdr, false);
	config->emitLLVM = args.hasArg(OPT_plugin_opt_emit_llvm, false);
	config->emitRelocs = args.hasArg(OPT_emit_relocs);
	config->callGraphProfileSort = args.hasFlag(
	OPT_call_graph_profile_sort, OPT_no_call_graph_profile_sort, true);
	config->enableNewDtags =
	args.hasFlag(OPT_enable_new_dtags, OPT_disable_new_dtags, true);
	config->entry = args.getLastArgValue(OPT_entry);

	errorHandler().errorHandlingScript =
	args.getLastArgValue(OPT_error_handling_script);

	config->executeOnly =
	args.hasFlag(OPT_execute_only, OPT_no_execute_only, false);
	config->exportDynamic =
	args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, false) \|\|
	args.hasArg(OPT_shared);
	config->filterList = args::getStrings(args, OPT_filter);
	config->fini = args.getLastArgValue(OPT_fini, "_fini");
	config->fixCortexA53Errata843419 = args.hasArg(OPT_fix_cortex_a53_843419) &&
	!args.hasArg(OPT_relocatable);
	config->fixCortexA8 =
	args.hasArg(OPT_fix_cortex_a8) && !args.hasArg(OPT_relocatable);
	config->fortranCommon =
	args.hasFlag(OPT_fortran_common, OPT_no_fortran_common, false);
	config->gcSections = args.hasFlag(OPT_gc_sections, OPT_no_gc_sections, false);
	config->gnuUnique = args.hasFlag(OPT_gnu_unique, OPT_no_gnu_unique, true);
	config->gdbIndex = args.hasFlag(OPT_gdb_index, OPT_no_gdb_index, false);
	config->icf = getICF(args);
	config->ignoreDataAddressEquality =
	args.hasArg(OPT_ignore_data_address_equality);
	config->ignoreFunctionAddressEquality =
	args.hasArg(OPT_ignore_function_address_equality);
	config->init = args.getLastArgValue(OPT_init, "_init");
	config->ltoAAPipeline = args.getLastArgValue(OPT_lto_aa_pipeline);
	config->ltoCSProfileGenerate = args.hasArg(OPT_lto_cs_profile_generate);
	config->ltoCSProfileFile = args.getLastArgValue(OPT_lto_cs_profile_file);
	config->ltoPGOWarnMismatch = args.hasFlag(OPT_lto_pgo_warn_mismatch,
	OPT_no_lto_pgo_warn_mismatch, true);
	config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager);
	config->ltoEmitAsm = args.hasArg(OPT_lto_emit_asm);
	config->ltoNewPmPasses = args.getLastArgValue(OPT_lto_newpm_passes);
	config->ltoWholeProgramVisibility =
	args.hasFlag(OPT_lto_whole_program_visibility,
	OPT_no_lto_whole_program_visibility, false);
	config->ltoo = args::getInteger(args, OPT_lto_O, 2);
	config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq);
	config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1);
	config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile);
	config->ltoBasicBlockSections =
	args.getLastArgValue(OPT_lto_basic_block_sections);
	config->ltoUniqueBasicBlockSectionNames =
	args.hasFlag(OPT_lto_unique_basic_block_section_names,
	OPT_no_lto_unique_basic_block_section_names, false);
	config->mapFile = args.getLastArgValue(OPT_Map);
	config->mipsGotSize = args::getInteger(args, OPT_mips_got_size, 0xfff0);
	config->mergeArmExidx =
	args.hasFlag(OPT_merge_exidx_entries, OPT_no_merge_exidx_entries, true);
	config->mmapOutputFile =
	args.hasFlag(OPT_mmap_output_file, OPT_no_mmap_output_file, true);
	config->nmagic = args.hasFlag(OPT_nmagic, OPT_no_nmagic, false);
	config->noinhibitExec = args.hasArg(OPT_noinhibit_exec);
	config->nostdlib = args.hasArg(OPT_nostdlib);
	config->oFormatBinary = isOutputFormatBinary(args);
	config->omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false);
	config->opaquePointers = args.hasFlag(
	OPT_plugin_opt_opaque_pointers, OPT_plugin_opt_no_opaque_pointers, true);
	config->optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename);
	config->optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file);

	// Parse remarks hotness threshold. Valid value is either integer or 'auto'.
	if (auto *arg = args.getLastArg(OPT_opt_remarks_hotness_threshold)) {
	auto resultOrErr = remarks::parseHotnessThresholdOption(arg->getValue());
	if (!resultOrErr)
	error(arg->getSpelling() + ": invalid argument '" + arg->getValue() +
	"', only integer or 'auto' is supported");
	else
	config->optRemarksHotnessThreshold = *resultOrErr;
	}

	config->optRemarksPasses = args.getLastArgValue(OPT_opt_remarks_passes);
	config->optRemarksWithHotness = args.hasArg(OPT_opt_remarks_with_hotness);
	config->optRemarksFormat = args.getLastArgValue(OPT_opt_remarks_format);
	config->optimize = args::getInteger(args, OPT_O, 1);
	config->orphanHandling = getOrphanHandling(args);
	config->outputFile = args.getLastArgValue(OPT_o);
	config->packageMetadata = args.getLastArgValue(OPT_package_metadata);
	config->pie = args.hasFlag(OPT_pie, OPT_no_pie, false);
	config->printIcfSections =
	args.hasFlag(OPT_print_icf_sections, OPT_no_print_icf_sections, false);
	config->printGcSections =
	args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
	config->printArchiveStats = args.getLastArgValue(OPT_print_archive_stats);
	config->printSymbolOrder =
	args.getLastArgValue(OPT_print_symbol_order);
	config->relax = args.hasFlag(OPT_relax, OPT_no_relax, true);
	config->rpath = getRpath(args);
	config->relocatable = args.hasArg(OPT_relocatable);

	if (args.hasArg(OPT_save_temps)) {
	// --save-temps implies saving all temps.
	for (const char *s : saveTempsValues)
	config->saveTempsArgs.insert(s);
	} else {
	for (auto *arg : args.filtered(OPT_save_temps_eq)) {
	StringRef s = arg->getValue();
	if (llvm::is_contained(saveTempsValues, s))
	config->saveTempsArgs.insert(s);
	else
	error("unknown --save-temps value: " + s);
	}
	}

	config->searchPaths = args::getStrings(args, OPT_library_path);
	config->sectionStartMap = getSectionStartMap(args);
	config->shared = args.hasArg(OPT_shared);
	config->singleRoRx = !args.hasFlag(OPT_rosegment, OPT_no_rosegment, true);
	config->soName = args.getLastArgValue(OPT_soname);
	config->sortSection = getSortSection(args);
	config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384);
	config->strip = getStrip(args);
	config->sysroot = args.getLastArgValue(OPT_sysroot);
	config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
	config->target2 = getTarget2(args);
	config->thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir);
	config->thinLTOCachePolicy = CHECK(
	parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)),
	"--thinlto-cache-policy: invalid cache policy");
	config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files);
	config->thinLTOEmitIndexFiles = args.hasArg(OPT_thinlto_emit_index_files) \|\|
	args.hasArg(OPT_thinlto_index_only) \|\|
	args.hasArg(OPT_thinlto_index_only_eq);
	config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) \|\|
	args.hasArg(OPT_thinlto_index_only_eq);
	config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq);
	config->thinLTOObjectSuffixReplace =
	getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq);
	config->thinLTOPrefixReplace =
	getOldNewOptions(args, OPT_thinlto_prefix_replace_eq);
	if (config->thinLTOEmitIndexFiles && !config->thinLTOIndexOnly) {
	if (args.hasArg(OPT_thinlto_object_suffix_replace_eq))
	error("--thinlto-object-suffix-replace is not supported with "
	"--thinlto-emit-index-files");
	else if (args.hasArg(OPT_thinlto_prefix_replace_eq))
	error("--thinlto-prefix-replace is not supported with "
	"--thinlto-emit-index-files");
	}
	config->thinLTOModulesToCompile =
	args::getStrings(args, OPT_thinlto_single_module_eq);
	config->timeTraceEnabled = args.hasArg(OPT_time_trace_eq);
	config->timeTraceGranularity =
	args::getInteger(args, OPT_time_trace_granularity, 500);
	config->trace = args.hasArg(OPT_trace);
	config->undefined = args::getStrings(args, OPT_undefined);
	config->undefinedVersion =
	args.hasFlag(OPT_undefined_version, OPT_no_undefined_version, true);
	config->unique = args.hasArg(OPT_unique);
	config->useAndroidRelrTags = args.hasFlag(
	OPT_use_android_relr_tags, OPT_no_use_android_relr_tags, false);
	config->warnBackrefs =
	args.hasFlag(OPT_warn_backrefs, OPT_no_warn_backrefs, false);
	config->warnCommon = args.hasFlag(OPT_warn_common, OPT_no_warn_common, false);
	config->warnSymbolOrdering =
	args.hasFlag(OPT_warn_symbol_ordering, OPT_no_warn_symbol_ordering, true);
	config->whyExtract = args.getLastArgValue(OPT_why_extract);
	config->zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
	config->zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
	config->zForceBti = hasZOption(args, "force-bti");
	config->zForceIbt = hasZOption(args, "force-ibt");
	config->zGlobal = hasZOption(args, "global");
	config->zGnustack = getZGnuStack(args);
	config->zHazardplt = hasZOption(args, "hazardplt");
	config->zIfuncNoplt = hasZOption(args, "ifunc-noplt");
	config->zInitfirst = hasZOption(args, "initfirst");
	config->zInterpose = hasZOption(args, "interpose");
	config->zKeepTextSectionPrefix = getZFlag(
	args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
	config->zNodefaultlib = hasZOption(args, "nodefaultlib");
	config->zNodelete = hasZOption(args, "nodelete");
	config->zNodlopen = hasZOption(args, "nodlopen");
	config->zNow = getZFlag(args, "now", "lazy", false);
	config->zOrigin = hasZOption(args, "origin");
	config->zPacPlt = hasZOption(args, "pac-plt");
	config->zRelro = getZFlag(args, "relro", "norelro", true);
	config->zRetpolineplt = hasZOption(args, "retpolineplt");
	config->zRodynamic = hasZOption(args, "rodynamic");
	config->zSeparate = getZSeparate(args);
	config->zShstk = hasZOption(args, "shstk");
	config->zStackSize = args::getZOptionValue(args, OPT_z, "stack-size", 0);
	config->zStartStopGC =
	getZFlag(args, "start-stop-gc", "nostart-stop-gc", true);
	config->zStartStopVisibility = getZStartStopVisibility(args);
	config->zText = getZFlag(args, "text", "notext", true);
	config->zWxneeded = hasZOption(args, "wxneeded");
	setUnresolvedSymbolPolicy(args);
	config->power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";

	if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
	if (arg->getOption().matches(OPT_eb))
	config->optEB = true;
	else
	config->optEL = true;
	}

	for (opt::Arg *arg : args.filtered(OPT_shuffle_sections)) {
	constexpr StringRef errPrefix = "--shuffle-sections=: ";
	std::pair<StringRef, StringRef> kv = StringRef(arg->getValue()).split('=');
	if (kv.first.empty() \|\| kv.second.empty()) {
	error(errPrefix + "expected <section_glob>=<seed>, but got '" +
	arg->getValue() + "'");
	continue;
	}
	// Signed so that <section_glob>=-1 is allowed.
	int64_t v;
	if (!to_integer(kv.second, v))
	error(errPrefix + "expected an integer, but got '" + kv.second + "'");
	else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
	config->shuffleSections.emplace_back(std::move(*pat), uint32_t(v));
	else
	error(errPrefix + toString(pat.takeError()));
	}

	auto reports = {std::make_pair("bti-report", &config->zBtiReport),
	std::make_pair("cet-report", &config->zCetReport)};
	for (opt::Arg *arg : args.filtered(OPT_z)) {
	std::pair<StringRef, StringRef> option =
	StringRef(arg->getValue()).split('=');
	for (auto reportArg : reports) {
	if (option.first != reportArg.first)
	continue;
	if (!isValidReportString(option.second)) {
	error(Twine("-z ") + reportArg.first + "= parameter " + option.second +
	" is not recognized");
	continue;
	}
	*reportArg.second = option.second;
	}
	}

	for (opt::Arg *arg : args.filtered(OPT_z)) {
	std::pair<StringRef, StringRef> option =
	StringRef(arg->getValue()).split('=');
	if (option.first != "dead-reloc-in-nonalloc")
	continue;
	constexpr StringRef errPrefix = "-z dead-reloc-in-nonalloc=: ";
	std::pair<StringRef, StringRef> kv = option.second.split('=');
	if (kv.first.empty() \|\| kv.second.empty()) {
	error(errPrefix + "expected <section_glob>=<value>");
	continue;
	}
	uint64_t v;
	if (!to_integer(kv.second, v))
	error(errPrefix + "expected a non-negative integer, but got '" +
	kv.second + "'");
	else if (Expected<GlobPattern> pat = GlobPattern::create(kv.first))
	config->deadRelocInNonAlloc.emplace_back(std::move(*pat), v);
	else
	error(errPrefix + toString(pat.takeError()));
	}

	cl::ResetAllOptionOccurrences();

	// Parse LTO options.
	if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq))
	parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
	arg->getSpelling());

	for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus))
	parseClangOption(std::string("-") + arg->getValue(), arg->getSpelling());

	// GCC collect2 passes -plugin-opt=path/to/lto-wrapper with an absolute or
	- // relative path. Just ignore. If not ended with "lto-wrapper", consider it an
	+ // relative path. Just ignore. If not ended with "lto-wrapper" (or
	+ // "lto-wrapper.exe" for GCC cross-compiled for Windows), consider it an
	// unsupported LLVMgold.so option and error.
	- for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq))
	- if (!StringRef(arg->getValue()).endswith("lto-wrapper"))
	+ for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq)) {
	+ StringRef v(arg->getValue());
	+ if (!v.endswith("lto-wrapper") && !v.endswith("lto-wrapper.exe"))
	error(arg->getSpelling() + ": unknown plugin option '" + arg->getValue() +
	"'");
	+ }

	config->passPlugins = args::getStrings(args, OPT_load_pass_plugins);

	// Parse -mllvm options.
	for (auto *arg : args.filtered(OPT_mllvm))
	parseClangOption(arg->getValue(), arg->getSpelling());

	// --threads= takes a positive integer and provides the default value for
	// --thinlto-jobs=.
	if (auto *arg = args.getLastArg(OPT_threads)) {
	StringRef v(arg->getValue());
	unsigned threads = 0;
	if (!llvm::to_integer(v, threads, 0) \|\| threads == 0)
	error(arg->getSpelling() + ": expected a positive integer, but got '" +
	arg->getValue() + "'");
	parallel::strategy = hardware_concurrency(threads);
	config->thinLTOJobs = v;
	}
	if (auto *arg = args.getLastArg(OPT_thinlto_jobs))
	config->thinLTOJobs = arg->getValue();

	if (config->ltoo > 3)
	error("invalid optimization level for LTO: " + Twine(config->ltoo));
	if (config->ltoPartitions == 0)
	error("--lto-partitions: number of threads must be > 0");
	if (!get_threadpool_strategy(config->thinLTOJobs))
	error("--thinlto-jobs: invalid job count: " + config->thinLTOJobs);

	if (config->splitStackAdjustSize < 0)
	error("--split-stack-adjust-size: size must be >= 0");

	// The text segment is traditionally the first segment, whose address equals
	// the base address. However, lld places the R PT_LOAD first. -Ttext-segment
	// is an old-fashioned option that does not play well with lld's layout.
	// Suggest --image-base as a likely alternative.
	if (args.hasArg(OPT_Ttext_segment))
	error("-Ttext-segment is not supported. Use --image-base if you "
	"intend to set the base address");

	// Parse ELF{32,64}{LE,BE} and CPU type.
	if (auto *arg = args.getLastArg(OPT_m)) {
	StringRef s = arg->getValue();
	std::tie(config->ekind, config->emachine, config->osabi) =
	parseEmulation(s);
	config->mipsN32Abi =
	(s.startswith("elf32btsmipn32") \|\| s.startswith("elf32ltsmipn32"));
	config->emulation = s;
	}

	// Parse --hash-style={sysv,gnu,both}.
	if (auto *arg = args.getLastArg(OPT_hash_style)) {
	StringRef s = arg->getValue();
	if (s == "sysv")
	config->sysvHash = true;
	else if (s == "gnu")
	config->gnuHash = true;
	else if (s == "both")
	config->sysvHash = config->gnuHash = true;
	else
	error("unknown --hash-style: " + s);
	}

	if (args.hasArg(OPT_print_map))
	config->mapFile = "-";

	// Page alignment can be disabled by the -n (--nmagic) and -N (--omagic).
	// As PT_GNU_RELRO relies on Paging, do not create it when we have disabled
	// it.
	if (config->nmagic \|\| config->omagic)
	config->zRelro = false;

	std::tie(config->buildId, config->buildIdVector) = getBuildId(args);

	if (getZFlag(args, "pack-relative-relocs", "nopack-relative-relocs", false)) {
	config->relrGlibc = true;
	config->relrPackDynRelocs = true;
	} else {
	std::tie(config->androidPackDynRelocs, config->relrPackDynRelocs) =
	getPackDynRelocs(args);
	}

	if (auto *arg = args.getLastArg(OPT_symbol_ordering_file)){
	if (args.hasArg(OPT_call_graph_ordering_file))
	error("--symbol-ordering-file and --call-graph-order-file "
	"may not be used together");
	if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue())){
	config->symbolOrderingFile = getSymbolOrderingFile(*buffer);
	// Also need to disable CallGraphProfileSort to prevent
	// LLD order symbols with CGProfile
	config->callGraphProfileSort = false;
	}
	}

	assert(config->versionDefinitions.empty());
	config->versionDefinitions.push_back(
	{"local", (uint16_t)VER_NDX_LOCAL, {}, {}});
	config->versionDefinitions.push_back(
	{"global", (uint16_t)VER_NDX_GLOBAL, {}, {}});

	// If --retain-symbol-file is used, we'll keep only the symbols listed in
	// the file and discard all others.
	if (auto *arg = args.getLastArg(OPT_retain_symbols_file)) {
	config->versionDefinitions[VER_NDX_LOCAL].nonLocalPatterns.push_back(
	{"", /isExternCpp=/false, /hasWildcard=*/true});
	if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
	for (StringRef s : args::getLines(*buffer))
	config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(
	{s, /isExternCpp=/false, /hasWildcard=/false});
	}

	for (opt::Arg *arg : args.filtered(OPT_warn_backrefs_exclude)) {
	StringRef pattern(arg->getValue());
	if (Expected<GlobPattern> pat = GlobPattern::create(pattern))
	config->warnBackrefsExclude.push_back(std::move(*pat));
	else
	error(arg->getSpelling() + ": " + toString(pat.takeError()));
	}

	// For -no-pie and -pie, --export-dynamic-symbol specifies defined symbols
	// which should be exported. For -shared, references to matched non-local
	// STV_DEFAULT symbols are not bound to definitions within the shared object,
	// even if other options express a symbolic intention: -Bsymbolic,
	// -Bsymbolic-functions (if STT_FUNC), --dynamic-list.
	for (auto *arg : args.filtered(OPT_export_dynamic_symbol))
	config->dynamicList.push_back(
	{arg->getValue(), /isExternCpp=/false,
	/hasWildcard=/hasWildcard(arg->getValue())});

	// --export-dynamic-symbol-list specifies a list of --export-dynamic-symbol
	// patterns. --dynamic-list is --export-dynamic-symbol-list plus -Bsymbolic
	// like semantics.
	config->symbolic =
	config->bsymbolic == BsymbolicKind::All \|\| args.hasArg(OPT_dynamic_list);
	for (auto *arg :
	args.filtered(OPT_dynamic_list, OPT_export_dynamic_symbol_list))
	if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
	readDynamicList(*buffer);

	for (auto *arg : args.filtered(OPT_version_script))
	if (Optional<std::string> path = searchScript(arg->getValue())) {
	if (Optional<MemoryBufferRef> buffer = readFile(*path))
	readVersionScript(*buffer);
	} else {
	error(Twine("cannot find version script ") + arg->getValue());
	}
	}

	// Some Config members do not directly correspond to any particular
	// command line options, but computed based on other Config values.
	// This function initialize such members. See Config.h for the details
	// of these values.
	static void setConfigs(opt::InputArgList &args) {
	ELFKind k = config->ekind;
	uint16_t m = config->emachine;

	config->copyRelocs = (config->relocatable \|\| config->emitRelocs);
	config->is64 = (k == ELF64LEKind \|\| k == ELF64BEKind);
	config->isLE = (k == ELF32LEKind \|\| k == ELF64LEKind);
	config->endianness = config->isLE ? endianness::little : endianness::big;
	config->isMips64EL = (k == ELF64LEKind && m == EM_MIPS);
	config->isPic = config->pie \|\| config->shared;
	config->picThunk = args.hasArg(OPT_pic_veneer, config->isPic);
	config->wordsize = config->is64 ? 8 : 4;

	// ELF defines two different ways to store relocation addends as shown below:
	//
	// Rel: Addends are stored to the location where relocations are applied. It
	// cannot pack the full range of addend values for all relocation types, but
	// this only affects relocation types that we don't support emitting as
	// dynamic relocations (see getDynRel).
	// Rela: Addends are stored as part of relocation entry.
	//
	// In other words, Rela makes it easy to read addends at the price of extra
	// 4 or 8 byte for each relocation entry.
	//
	// We pick the format for dynamic relocations according to the psABI for each
	// processor, but a contrary choice can be made if the dynamic loader
	// supports.
	config->isRela = getIsRela(args);

	// If the output uses REL relocations we must store the dynamic relocation
	// addends to the output sections. We also store addends for RELA relocations
	// if --apply-dynamic-relocs is used.
	// We default to not writing the addends when using RELA relocations since
	// any standard conforming tool can find it in r_addend.
	config->writeAddends = args.hasFlag(OPT_apply_dynamic_relocs,
	OPT_no_apply_dynamic_relocs, false) \|\|
	!config->isRela;
	// Validation of dynamic relocation addends is on by default for assertions
	// builds (for supported targets) and disabled otherwise. Ideally we would
	// enable the debug checks for all targets, but currently not all targets
	// have support for reading Elf_Rel addends, so we only enable for a subset.
	#ifndef NDEBUG
	bool checkDynamicRelocsDefault = m == EM_ARM \|\| m == EM_386 \|\| m == EM_MIPS \|\|
	m == EM_X86_64 \|\| m == EM_RISCV;
	#else
	bool checkDynamicRelocsDefault = false;
	#endif
	config->checkDynamicRelocs =
	args.hasFlag(OPT_check_dynamic_relocations,
	OPT_no_check_dynamic_relocations, checkDynamicRelocsDefault);
	config->tocOptimize =
	args.hasFlag(OPT_toc_optimize, OPT_no_toc_optimize, m == EM_PPC64);
	config->pcRelOptimize =
	args.hasFlag(OPT_pcrel_optimize, OPT_no_pcrel_optimize, m == EM_PPC64);
	}

	static bool isFormatBinary(StringRef s) {
	if (s == "binary")
	return true;
	if (s == "elf" \|\| s == "default")
	return false;
	error("unknown --format value: " + s +
	" (supported formats: elf, default, binary)");
	return false;
	}

	void LinkerDriver::createFiles(opt::InputArgList &args) {
	llvm::TimeTraceScope timeScope("Load input files");
	// For --{push,pop}-state.
	std::vector<std::tuple<bool, bool, bool>> stack;

	// Iterate over argv to process input files and positional arguments.
	InputFile::isInGroup = false;
	bool hasInput = false;
	for (auto *arg : args) {
	switch (arg->getOption().getID()) {
	case OPT_library:
	addLibrary(arg->getValue());
	hasInput = true;
	break;
	case OPT_INPUT:
	addFile(arg->getValue(), /withLOption=/false);
	hasInput = true;
	break;
	case OPT_defsym: {
	StringRef from;
	StringRef to;
	std::tie(from, to) = StringRef(arg->getValue()).split('=');
	if (from.empty() \|\| to.empty())
	error("--defsym: syntax error: " + StringRef(arg->getValue()));
	else
	readDefsym(from, MemoryBufferRef(to, "--defsym"));
	break;
	}
	case OPT_script:
	if (Optional<std::string> path = searchScript(arg->getValue())) {
	if (Optional<MemoryBufferRef> mb = readFile(*path))
	readLinkerScript(*mb);
	break;
	}
	error(Twine("cannot find linker script ") + arg->getValue());
	break;
	case OPT_as_needed:
	config->asNeeded = true;
	break;
	case OPT_format:
	config->formatBinary = isFormatBinary(arg->getValue());
	break;
	case OPT_no_as_needed:
	config->asNeeded = false;
	break;
	case OPT_Bstatic:
	case OPT_omagic:
	case OPT_nmagic:
	config->isStatic = true;
	break;
	case OPT_Bdynamic:
	config->isStatic = false;
	break;
	case OPT_whole_archive:
	inWholeArchive = true;
	break;
	case OPT_no_whole_archive:
	inWholeArchive = false;
	break;
	case OPT_just_symbols:
	if (Optional<MemoryBufferRef> mb = readFile(arg->getValue())) {
	files.push_back(createObjFile(*mb));
	files.back()->justSymbols = true;
	}
	break;
	case OPT_start_group:
	if (InputFile::isInGroup)
	error("nested --start-group");
	InputFile::isInGroup = true;
	break;
	case OPT_end_group:
	if (!InputFile::isInGroup)
	error("stray --end-group");
	InputFile::isInGroup = false;
	++InputFile::nextGroupId;
	break;
	case OPT_start_lib:
	if (inLib)
	error("nested --start-lib");
	if (InputFile::isInGroup)
	error("may not nest --start-lib in --start-group");
	inLib = true;
	InputFile::isInGroup = true;
	break;
	case OPT_end_lib:
	if (!inLib)
	error("stray --end-lib");
	inLib = false;
	InputFile::isInGroup = false;
	++InputFile::nextGroupId;
	break;
	case OPT_push_state:
	stack.emplace_back(config->asNeeded, config->isStatic, inWholeArchive);
	break;
	case OPT_pop_state:
	if (stack.empty()) {
	error("unbalanced --push-state/--pop-state");
	break;
	}
	std::tie(config->asNeeded, config->isStatic, inWholeArchive) = stack.back();
	stack.pop_back();
	break;
	}
	}

	if (files.empty() && !hasInput && errorCount() == 0)
	error("no input files");
	}

	// If -m <machine_type> was not given, infer it from object files.
	void LinkerDriver::inferMachineType() {
	if (config->ekind != ELFNoneKind)
	return;

	for (InputFile *f : files) {
	if (f->ekind == ELFNoneKind)
	continue;
	config->ekind = f->ekind;
	config->emachine = f->emachine;
	config->osabi = f->osabi;
	config->mipsN32Abi = config->emachine == EM_MIPS && isMipsN32Abi(f);
	return;
	}
	error("target emulation unknown: -m or at least one .o file required");
	}

	// Parse -z max-page-size=<value>. The default value is defined by
	// each target.
	static uint64_t getMaxPageSize(opt::InputArgList &args) {
	uint64_t val = args::getZOptionValue(args, OPT_z, "max-page-size",
	target->defaultMaxPageSize);
	if (!isPowerOf2_64(val)) {
	error("max-page-size: value isn't a power of 2");
	return target->defaultMaxPageSize;
	}
	if (config->nmagic \|\| config->omagic) {
	if (val != target->defaultMaxPageSize)
	warn("-z max-page-size set, but paging disabled by omagic or nmagic");
	return 1;
	}
	return val;
	}

	// Parse -z common-page-size=<value>. The default value is defined by
	// each target.
	static uint64_t getCommonPageSize(opt::InputArgList &args) {
	uint64_t val = args::getZOptionValue(args, OPT_z, "common-page-size",
	target->defaultCommonPageSize);
	if (!isPowerOf2_64(val)) {
	error("common-page-size: value isn't a power of 2");
	return target->defaultCommonPageSize;
	}
	if (config->nmagic \|\| config->omagic) {
	if (val != target->defaultCommonPageSize)
	warn("-z common-page-size set, but paging disabled by omagic or nmagic");
	return 1;
	}
	// commonPageSize can't be larger than maxPageSize.
	if (val > config->maxPageSize)
	val = config->maxPageSize;
	return val;
	}

	// Parses --image-base option.
	static Optional<uint64_t> getImageBase(opt::InputArgList &args) {
	// Because we are using "Config->maxPageSize" here, this function has to be
	// called after the variable is initialized.
	auto *arg = args.getLastArg(OPT_image_base);
	if (!arg)
	return None;

	StringRef s = arg->getValue();
	uint64_t v;
	if (!to_integer(s, v)) {
	error("--image-base: number expected, but got " + s);
	return 0;
	}
	if ((v % config->maxPageSize) != 0)
	warn("--image-base: address isn't multiple of page size: " + s);
	return v;
	}

	// Parses `--exclude-libs=lib,lib,...`.
	// The library names may be delimited by commas or colons.
	static DenseSet<StringRef> getExcludeLibs(opt::InputArgList &args) {
	DenseSet<StringRef> ret;
	for (auto *arg : args.filtered(OPT_exclude_libs)) {
	StringRef s = arg->getValue();
	for (;;) {
	size_t pos = s.find_first_of(",:");
	if (pos == StringRef::npos)
	break;
	ret.insert(s.substr(0, pos));
	s = s.substr(pos + 1);
	}
	ret.insert(s);
	}
	return ret;
	}

	// Handles the --exclude-libs option. If a static library file is specified
	// by the --exclude-libs option, all public symbols from the archive become
	// private unless otherwise specified by version scripts or something.
	// A special library name "ALL" means all archive files.
	//
	// This is not a popular option, but some programs such as bionic libc use it.
	static void excludeLibs(opt::InputArgList &args) {
	DenseSet<StringRef> libs = getExcludeLibs(args);
	bool all = libs.count("ALL");

	auto visit = [&](InputFile *file) {
	if (file->archiveName.empty() \|\|
	!(all \|\| libs.count(path::filename(file->archiveName))))
	return;
	ArrayRef<Symbol *> symbols = file->getSymbols();
	if (isa<ELFFileBase>(file))
	symbols = cast<ELFFileBase>(file)->getGlobalSymbols();
	for (Symbol *sym : symbols)
	if (!sym->isUndefined() && sym->file == file)
	sym->versionId = VER_NDX_LOCAL;
	};

	for (ELFFileBase *file : ctx->objectFiles)
	visit(file);

	for (BitcodeFile *file : ctx->bitcodeFiles)
	visit(file);
	}

	// Force Sym to be entered in the output.
	static void handleUndefined(Symbol sym, const char option) {
	// Since a symbol may not be used inside the program, LTO may
	// eliminate it. Mark the symbol as "used" to prevent it.
	sym->isUsedInRegularObj = true;

	if (!sym->isLazy())
	return;
	sym->extract();
	if (!config->whyExtract.empty())
	ctx->whyExtractRecords.emplace_back(option, sym->file, *sym);
	}

	// As an extension to GNU linkers, lld supports a variant of `-u`
	// which accepts wildcard patterns. All symbols that match a given
	// pattern are handled as if they were given by `-u`.
	static void handleUndefinedGlob(StringRef arg) {
	Expected<GlobPattern> pat = GlobPattern::create(arg);
	if (!pat) {
	error("--undefined-glob: " + toString(pat.takeError()));
	return;
	}

	// Calling sym->extract() in the loop is not safe because it may add new
	// symbols to the symbol table, invalidating the current iterator.
	SmallVector<Symbol *, 0> syms;
	for (Symbol *sym : symtab->symbols())
	if (!sym->isPlaceholder() && pat->match(sym->getName()))
	syms.push_back(sym);

	for (Symbol *sym : syms)
	handleUndefined(sym, "--undefined-glob");
	}

	static void handleLibcall(StringRef name) {
	Symbol *sym = symtab->find(name);
	if (!sym \|\| !sym->isLazy())
	return;

	MemoryBufferRef mb;
	mb = cast<LazyObject>(sym)->file->mb;

	if (isBitcode(mb))
	sym->extract();
	}

	static void writeArchiveStats() {
	if (config->printArchiveStats.empty())
	return;

	std::error_code ec;
	raw_fd_ostream os(config->printArchiveStats, ec, sys::fs::OF_None);
	if (ec) {
	error("--print-archive-stats=: cannot open " + config->printArchiveStats +
	": " + ec.message());
	return;
	}

	os << "members\textracted\tarchive\n";

	SmallVector<StringRef, 0> archives;
	DenseMap<CachedHashStringRef, unsigned> all, extracted;
	for (ELFFileBase *file : ctx->objectFiles)
	if (file->archiveName.size())
	++extracted[CachedHashStringRef(file->archiveName)];
	for (BitcodeFile *file : ctx->bitcodeFiles)
	if (file->archiveName.size())
	++extracted[CachedHashStringRef(file->archiveName)];
	for (std::pair<StringRef, unsigned> f : driver->archiveFiles) {
	unsigned &v = extracted[CachedHashString(f.first)];
	os << f.second << '\t' << v << '\t' << f.first << '\n';
	// If the archive occurs multiple times, other instances have a count of 0.
	v = 0;
	}
	}

	static void writeWhyExtract() {
	if (config->whyExtract.empty())
	return;

	std::error_code ec;
	raw_fd_ostream os(config->whyExtract, ec, sys::fs::OF_None);
	if (ec) {
	error("cannot open --why-extract= file " + config->whyExtract + ": " +
	ec.message());
	return;
	}

	os << "reference\textracted\tsymbol\n";
	for (auto &entry : ctx->whyExtractRecords) {
	os << std::get<0>(entry) << '\t' << toString(std::get<1>(entry)) << '\t'
	<< toString(std::get<2>(entry)) << '\n';
	}
	}

	static void reportBackrefs() {
	for (auto &ref : ctx->backwardReferences) {
	const Symbol &sym = *ref.first;
	std::string to = toString(ref.second.second);
	// Some libraries have known problems and can cause noise. Filter them out
	// with --warn-backrefs-exclude=. The value may look like (for --start-lib)
	// .o or (archive member) .a(*.o).
	bool exclude = false;
	for (const llvm::GlobPattern &pat : config->warnBackrefsExclude)
	if (pat.match(to)) {
	exclude = true;
	break;
	}
	if (!exclude)
	warn("backward reference detected: " + sym.getName() + " in " +
	toString(ref.second.first) + " refers to " + to);
	}
	}

	// Handle --dependency-file=<path>. If that option is given, lld creates a
	// file at a given path with the following contents:
	//
	// <output-file>: <input-file> ...
	//
	// <input-file>:
	//
	// where <output-file> is a pathname of an output file and <input-file>
	// ... is a list of pathnames of all input files. `make` command can read a
	// file in the above format and interpret it as a dependency info. We write
	// phony targets for every <input-file> to avoid an error when that file is
	// removed.
	//
	// This option is useful if you want to make your final executable to depend
	// on all input files including system libraries. Here is why.
	//
	// When you write a Makefile, you usually write it so that the final
	// executable depends on all user-generated object files. Normally, you
	// don't make your executable to depend on system libraries (such as libc)
	// because you don't know the exact paths of libraries, even though system
	// libraries that are linked to your executable statically are technically a
	// part of your program. By using --dependency-file option, you can make
	// lld to dump dependency info so that you can maintain exact dependencies
	// easily.
	static void writeDependencyFile() {
	std::error_code ec;
	raw_fd_ostream os(config->dependencyFile, ec, sys::fs::OF_None);
	if (ec) {
	error("cannot open " + config->dependencyFile + ": " + ec.message());
	return;
	}

	// We use the same escape rules as Clang/GCC which are accepted by Make/Ninja:
	// * A space is escaped by a backslash which itself must be escaped.
	// * A hash sign is escaped by a single backslash.
	// * $ is escapes as $$.
	auto printFilename = [](raw_fd_ostream &os, StringRef filename) {
	llvm::SmallString<256> nativePath;
	llvm::sys::path::native(filename.str(), nativePath);
	llvm::sys::path::remove_dots(nativePath, /remove_dot_dot=/true);
	for (unsigned i = 0, e = nativePath.size(); i != e; ++i) {
	if (nativePath[i] == '#') {
	os << '\\';
	} else if (nativePath[i] == ' ') {
	os << '\\';
	unsigned j = i;
	while (j > 0 && nativePath[--j] == '\\')
	os << '\\';
	} else if (nativePath[i] == '$') {
	os << '$';
	}
	os << nativePath[i];
	}
	};

	os << config->outputFile << ":";
	for (StringRef path : config->dependencyFiles) {
	os << " \\\n ";
	printFilename(os, path);
	}
	os << "\n";

	for (StringRef path : config->dependencyFiles) {
	os << "\n";
	printFilename(os, path);
	os << ":\n";
	}
	}

	// Replaces common symbols with defined symbols reside in .bss sections.
	// This function is called after all symbol names are resolved. As a
	// result, the passes after the symbol resolution won't see any
	// symbols of type CommonSymbol.
	static void replaceCommonSymbols() {
	llvm::TimeTraceScope timeScope("Replace common symbols");
	for (ELFFileBase *file : ctx->objectFiles) {
	if (!file->hasCommonSyms)
	continue;
	for (Symbol *sym : file->getGlobalSymbols()) {
	auto *s = dyn_cast<CommonSymbol>(sym);
	if (!s)
	continue;

	auto *bss = make<BssSection>("COMMON", s->size, s->alignment);
	bss->file = s->file;
	inputSections.push_back(bss);
	s->replace(Defined{s->file, StringRef(), s->binding, s->stOther, s->type,
	/value=/0, s->size, bss});
	}
	}
	}

	// If all references to a DSO happen to be weak, the DSO is not added to
	// DT_NEEDED. If that happens, replace ShardSymbol with Undefined to avoid
	// dangling references to an unneeded DSO. Use a weak binding to avoid
	// --no-allow-shlib-undefined diagnostics. Similarly, demote lazy symbols.
	static void demoteSharedAndLazySymbols() {
	llvm::TimeTraceScope timeScope("Demote shared and lazy symbols");
	for (Symbol *sym : symtab->symbols()) {
	auto *s = dyn_cast<SharedSymbol>(sym);
	if (!(s && !cast<SharedFile>(s->file)->isNeeded) && !sym->isLazy())
	continue;

	bool used = sym->used;
	uint8_t binding = sym->isLazy() ? sym->binding : uint8_t(STB_WEAK);
	sym->replace(
	Undefined{nullptr, sym->getName(), binding, sym->stOther, sym->type});
	sym->used = used;
	sym->versionId = VER_NDX_GLOBAL;
	}
	}

	// The section referred to by `s` is considered address-significant. Set the
	// keepUnique flag on the section if appropriate.
	static void markAddrsig(Symbol *s) {
	if (auto *d = dyn_cast_or_null<Defined>(s))
	if (d->section)
	// We don't need to keep text sections unique under --icf=all even if they
	// are address-significant.
	if (config->icf == ICFLevel::Safe \|\| !(d->section->flags & SHF_EXECINSTR))
	d->section->keepUnique = true;
	}

	// Record sections that define symbols mentioned in --keep-unique <symbol>
	// and symbols referred to by address-significance tables. These sections are
	// ineligible for ICF.
	template <class ELFT>
	static void findKeepUniqueSections(opt::InputArgList &args) {
	for (auto *arg : args.filtered(OPT_keep_unique)) {
	StringRef name = arg->getValue();
	auto *d = dyn_cast_or_null<Defined>(symtab->find(name));
	if (!d \|\| !d->section) {
	warn("could not find symbol " + name + " to keep unique");
	continue;
	}
	d->section->keepUnique = true;
	}

	// --icf=all --ignore-data-address-equality means that we can ignore
	// the dynsym and address-significance tables entirely.
	if (config->icf == ICFLevel::All && config->ignoreDataAddressEquality)
	return;

	// Symbols in the dynsym could be address-significant in other executables
	// or DSOs, so we conservatively mark them as address-significant.
	for (Symbol *sym : symtab->symbols())
	if (sym->includeInDynsym())
	markAddrsig(sym);

	// Visit the address-significance table in each object file and mark each
	// referenced symbol as address-significant.
	for (InputFile *f : ctx->objectFiles) {
	auto *obj = cast<ObjFile<ELFT>>(f);
	ArrayRef<Symbol *> syms = obj->getSymbols();
	if (obj->addrsigSec) {
	ArrayRef<uint8_t> contents =
	check(obj->getObj().getSectionContents(*obj->addrsigSec));
	const uint8_t *cur = contents.begin();
	while (cur != contents.end()) {
	unsigned size;
	const char *err;
	uint64_t symIndex = decodeULEB128(cur, &size, contents.end(), &err);
	if (err)
	fatal(toString(f) + ": could not decode addrsig section: " + err);
	markAddrsig(syms[symIndex]);
	cur += size;
	}
	} else {
	// If an object file does not have an address-significance table,
	// conservatively mark all of its symbols as address-significant.
	for (Symbol *s : syms)
	markAddrsig(s);
	}
	}
	}

	// This function reads a symbol partition specification section. These sections
	// are used to control which partition a symbol is allocated to. See
	// https://lld.llvm.org/Partitions.html for more details on partitions.
	template <typename ELFT>
	static void readSymbolPartitionSection(InputSectionBase *s) {
	// Read the relocation that refers to the partition's entry point symbol.
	Symbol *sym;
	const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();
	if (rels.areRelocsRel())
	sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.rels[0]);
	else
	sym = &s->getFile<ELFT>()->getRelocTargetSym(rels.relas[0]);
	if (!isa<Defined>(sym) \|\| !sym->includeInDynsym())
	return;

	StringRef partName = reinterpret_cast<const char *>(s->rawData.data());
	for (Partition &part : partitions) {
	if (part.name == partName) {
	sym->partition = part.getNumber();
	return;
	}
	}

	// Forbid partitions from being used on incompatible targets, and forbid them
	// from being used together with various linker features that assume a single
	// set of output sections.
	if (script->hasSectionsCommand)
	error(toString(s->file) +
	": partitions cannot be used with the SECTIONS command");
	if (script->hasPhdrsCommands())
	error(toString(s->file) +
	": partitions cannot be used with the PHDRS command");
	if (!config->sectionStartMap.empty())
	error(toString(s->file) + ": partitions cannot be used with "
	"--section-start, -Ttext, -Tdata or -Tbss");
	if (config->emachine == EM_MIPS)
	error(toString(s->file) + ": partitions cannot be used on this target");

	// Impose a limit of no more than 254 partitions. This limit comes from the
	// sizes of the Partition fields in InputSectionBase and Symbol, as well as
	// the amount of space devoted to the partition number in RankFlags.
	if (partitions.size() == 254)
	fatal("may not have more than 254 partitions");

	partitions.emplace_back();
	Partition &newPart = partitions.back();
	newPart.name = partName;
	sym->partition = newPart.getNumber();
	}

	static Symbol *addUnusedUndefined(StringRef name,
	uint8_t binding = STB_GLOBAL) {
	return symtab->addSymbol(Undefined{nullptr, name, binding, STV_DEFAULT, 0});
	}

	static void markBuffersAsDontNeed(bool skipLinkedOutput) {
	// With --thinlto-index-only, all buffers are nearly unused from now on
	// (except symbol/section names used by infrequent passes). Mark input file
	// buffers as MADV_DONTNEED so that these pages can be reused by the expensive
	// thin link, saving memory.
	if (skipLinkedOutput) {
	for (MemoryBuffer &mb : llvm::make_pointee_range(ctx->memoryBuffers))
	mb.dontNeedIfMmap();
	return;
	}

	// Otherwise, just mark MemoryBuffers backing BitcodeFiles.
	DenseSet<const char *> bufs;
	for (BitcodeFile *file : ctx->bitcodeFiles)
	bufs.insert(file->mb.getBufferStart());
	for (BitcodeFile *file : ctx->lazyBitcodeFiles)
	bufs.insert(file->mb.getBufferStart());
	for (MemoryBuffer &mb : llvm::make_pointee_range(ctx->memoryBuffers))
	if (bufs.count(mb.getBufferStart()))
	mb.dontNeedIfMmap();
	}

	// This function is where all the optimizations of link-time
	// optimization takes place. When LTO is in use, some input files are
	// not in native object file format but in the LLVM bitcode format.
	// This function compiles bitcode files into a few big native files
	// using LLVM functions and replaces bitcode symbols with the results.
	// Because all bitcode files that the program consists of are passed to
	// the compiler at once, it can do a whole-program optimization.
	template <class ELFT>
	void LinkerDriver::compileBitcodeFiles(bool skipLinkedOutput) {
	llvm::TimeTraceScope timeScope("LTO");
	// Compile bitcode files and replace bitcode symbols.
	lto.reset(new BitcodeCompiler);
	for (BitcodeFile *file : ctx->bitcodeFiles)
	lto->add(*file);

	if (!ctx->bitcodeFiles.empty())
	markBuffersAsDontNeed(skipLinkedOutput);

	for (InputFile *file : lto->compile()) {
	auto *obj = cast<ObjFile<ELFT>>(file);
	obj->parse(/ignoreComdats=/true);

	// Parse '@' in symbol names for non-relocatable output.
	if (!config->relocatable)
	for (Symbol *sym : obj->getGlobalSymbols())
	if (sym->hasVersionSuffix)
	sym->parseSymbolVersion();
	ctx->objectFiles.push_back(obj);
	}
	}

	// The --wrap option is a feature to rename symbols so that you can write
	// wrappers for existing functions. If you pass `--wrap=foo`, all
	// occurrences of symbol `foo` are resolved to `__wrap_foo` (so, you are
	// expected to write `__wrap_foo` function as a wrapper). The original
	// symbol becomes accessible as `__real_foo`, so you can call that from your
	// wrapper.
	//
	// This data structure is instantiated for each --wrap option.
	struct WrappedSymbol {
	Symbol *sym;
	Symbol *real;
	Symbol *wrap;
	};

	// Handles --wrap option.
	//
	// This function instantiates wrapper symbols. At this point, they seem
	// like they are not being used at all, so we explicitly set some flags so
	// that LTO won't eliminate them.
	static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
	std::vector<WrappedSymbol> v;
	DenseSet<StringRef> seen;

	for (auto *arg : args.filtered(OPT_wrap)) {
	StringRef name = arg->getValue();
	if (!seen.insert(name).second)
	continue;

	Symbol *sym = symtab->find(name);
	if (!sym)
	continue;

	Symbol *real = addUnusedUndefined(saver().save("__real_" + name));
	Symbol *wrap =
	addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
	v.push_back({sym, real, wrap});

	// We want to tell LTO not to inline symbols to be overwritten
	// because LTO doesn't know the final symbol contents after renaming.
	real->scriptDefined = true;
	sym->scriptDefined = true;

	// If a symbol is referenced in any object file, bitcode file or shared
	// object, mark its redirection target (foo for __real_foo and __wrap_foo
	// for foo) as referenced after redirection, which will be used to tell LTO
	// to not eliminate the redirection target. If the object file defining the
	// symbol also references it, we cannot easily distinguish the case from
	// cases where the symbol is not referenced. Retain the redirection target
	// in this case because we choose to wrap symbol references regardless of
	// whether the symbol is defined
	// (https://sourceware.org/bugzilla/show_bug.cgi?id=26358).
	if (real->referenced \|\| real->isDefined())
	sym->referencedAfterWrap = true;
	if (sym->referenced \|\| sym->isDefined())
	wrap->referencedAfterWrap = true;
	}
	return v;
	}

	// Do renaming for --wrap and foo@v1 by updating pointers to symbols.
	//
	// When this function is executed, only InputFiles and symbol table
	// contain pointers to symbol objects. We visit them to replace pointers,
	// so that wrapped symbols are swapped as instructed by the command line.
	static void redirectSymbols(ArrayRef<WrappedSymbol> wrapped) {
	llvm::TimeTraceScope timeScope("Redirect symbols");
	DenseMap<Symbol , Symbol > map;
	for (const WrappedSymbol &w : wrapped) {
	map[w.sym] = w.wrap;
	map[w.real] = w.sym;
	}
	for (Symbol *sym : symtab->symbols()) {
	// Enumerate symbols with a non-default version (foo@v1). hasVersionSuffix
	// filters out most symbols but is not sufficient.
	if (!sym->hasVersionSuffix)
	continue;
	const char *suffix1 = sym->getVersionSuffix();
	if (suffix1[0] != '@' \|\| suffix1[1] == '@')
	continue;

	// Check the existing symbol foo. We have two special cases to handle:
	//
	// * There is a definition of foo@v1 and foo@@v1.
	// * There is a definition of foo@v1 and foo.
	Defined *sym2 = dyn_cast_or_null<Defined>(symtab->find(sym->getName()));
	if (!sym2)
	continue;
	const char *suffix2 = sym2->getVersionSuffix();
	if (suffix2[0] == '@' && suffix2[1] == '@' &&
	strcmp(suffix1 + 1, suffix2 + 2) == 0) {
	// foo@v1 and foo@@v1 should be merged, so redirect foo@v1 to foo@@v1.
	map.try_emplace(sym, sym2);
	// If both foo@v1 and foo@@v1 are defined and non-weak, report a duplicate
	// definition error.
	if (sym->isDefined())
	sym2->checkDuplicate(cast<Defined>(*sym));
	sym2->resolve(*sym);
	// Eliminate foo@v1 from the symbol table.
	sym->symbolKind = Symbol::PlaceholderKind;
	sym->isUsedInRegularObj = false;
	} else if (auto *sym1 = dyn_cast<Defined>(sym)) {
	if (sym2->versionId > VER_NDX_GLOBAL
	? config->versionDefinitions[sym2->versionId].name == suffix1 + 1
	: sym1->section == sym2->section && sym1->value == sym2->value) {
	// Due to an assembler design flaw, if foo is defined, .symver foo,
	// foo@v1 defines both foo and foo@v1. Unless foo is bound to a
	// different version, GNU ld makes foo@v1 canonical and eliminates foo.
	// Emulate its behavior, otherwise we would have foo or foo@@v1 beside
	// foo@v1. foo@v1 and foo combining does not apply if they are not
	// defined in the same place.
	map.try_emplace(sym2, sym);
	sym2->symbolKind = Symbol::PlaceholderKind;
	sym2->isUsedInRegularObj = false;
	}
	}
	}

	if (map.empty())
	return;

	// Update pointers in input files.
	parallelForEach(ctx->objectFiles, [&](ELFFileBase *file) {
	for (Symbol *&sym : file->getMutableGlobalSymbols())
	if (Symbol *s = map.lookup(sym))
	sym = s;
	});

	// Update pointers in the symbol table.
	for (const WrappedSymbol &w : wrapped)
	symtab->wrap(w.sym, w.real, w.wrap);
	}

	static void checkAndReportMissingFeature(StringRef config, uint32_t features,
	uint32_t mask, const Twine &report) {
	if (!(features & mask)) {
	if (config == "error")
	error(report);
	else if (config == "warning")
	warn(report);
	}
	}

	// To enable CET (x86's hardware-assited control flow enforcement), each
	// source file must be compiled with -fcf-protection. Object files compiled
	// with the flag contain feature flags indicating that they are compatible
	// with CET. We enable the feature only when all object files are compatible
	// with CET.
	//
	// This is also the case with AARCH64's BTI and PAC which use the similar
	// GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism.
	static uint32_t getAndFeatures() {
	if (config->emachine != EM_386 && config->emachine != EM_X86_64 &&
	config->emachine != EM_AARCH64)
	return 0;

	uint32_t ret = -1;
	for (ELFFileBase *f : ctx->objectFiles) {
	uint32_t features = f->andFeatures;

	checkAndReportMissingFeature(
	config->zBtiReport, features, GNU_PROPERTY_AARCH64_FEATURE_1_BTI,
	toString(f) + ": -z bti-report: file does not have "
	"GNU_PROPERTY_AARCH64_FEATURE_1_BTI property");

	checkAndReportMissingFeature(
	config->zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_IBT,
	toString(f) + ": -z cet-report: file does not have "
	"GNU_PROPERTY_X86_FEATURE_1_IBT property");

	checkAndReportMissingFeature(
	config->zCetReport, features, GNU_PROPERTY_X86_FEATURE_1_SHSTK,
	toString(f) + ": -z cet-report: file does not have "
	"GNU_PROPERTY_X86_FEATURE_1_SHSTK property");

	if (config->zForceBti && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) {
	features \|= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
	if (config->zBtiReport == "none")
	warn(toString(f) + ": -z force-bti: file does not have "
	"GNU_PROPERTY_AARCH64_FEATURE_1_BTI property");
	} else if (config->zForceIbt &&
	!(features & GNU_PROPERTY_X86_FEATURE_1_IBT)) {
	if (config->zCetReport == "none")
	warn(toString(f) + ": -z force-ibt: file does not have "
	"GNU_PROPERTY_X86_FEATURE_1_IBT property");
	features \|= GNU_PROPERTY_X86_FEATURE_1_IBT;
	}
	if (config->zPacPlt && !(features & GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) {
	warn(toString(f) + ": -z pac-plt: file does not have "
	"GNU_PROPERTY_AARCH64_FEATURE_1_PAC property");
	features \|= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
	}
	ret &= features;
	}

	// Force enable Shadow Stack.
	if (config->zShstk)
	ret \|= GNU_PROPERTY_X86_FEATURE_1_SHSTK;

	return ret;
	}

	static void initializeLocalSymbols(ELFFileBase *file) {
	switch (config->ekind) {
	case ELF32LEKind:
	cast<ObjFile<ELF32LE>>(file)->initializeLocalSymbols();
	break;
	case ELF32BEKind:
	cast<ObjFile<ELF32BE>>(file)->initializeLocalSymbols();
	break;
	case ELF64LEKind:
	cast<ObjFile<ELF64LE>>(file)->initializeLocalSymbols();
	break;
	case ELF64BEKind:
	cast<ObjFile<ELF64BE>>(file)->initializeLocalSymbols();
	break;
	default:
	llvm_unreachable("");
	}
	}

	static void postParseObjectFile(ELFFileBase *file) {
	switch (config->ekind) {
	case ELF32LEKind:
	cast<ObjFile<ELF32LE>>(file)->postParse();
	break;
	case ELF32BEKind:
	cast<ObjFile<ELF32BE>>(file)->postParse();
	break;
	case ELF64LEKind:
	cast<ObjFile<ELF64LE>>(file)->postParse();
	break;
	case ELF64BEKind:
	cast<ObjFile<ELF64BE>>(file)->postParse();
	break;
	default:
	llvm_unreachable("");
	}
	}

	// Do actual linking. Note that when this function is called,
	// all linker scripts have already been parsed.
	void LinkerDriver::link(opt::InputArgList &args) {
	llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link"));
	// If a --hash-style option was not given, set to a default value,
	// which varies depending on the target.
	if (!args.hasArg(OPT_hash_style)) {
	if (config->emachine == EM_MIPS)
	config->sysvHash = true;
	else
	config->sysvHash = config->gnuHash = true;
	}

	// Default output filename is "a.out" by the Unix tradition.
	if (config->outputFile.empty())
	config->outputFile = "a.out";

	// Fail early if the output file or map file is not writable. If a user has a
	// long link, e.g. due to a large LTO link, they do not wish to run it and
	// find that it failed because there was a mistake in their command-line.
	{
	llvm::TimeTraceScope timeScope("Create output files");
	if (auto e = tryCreateFile(config->outputFile))
	error("cannot open output file " + config->outputFile + ": " +
	e.message());
	if (auto e = tryCreateFile(config->mapFile))
	error("cannot open map file " + config->mapFile + ": " + e.message());
	if (auto e = tryCreateFile(config->whyExtract))
	error("cannot open --why-extract= file " + config->whyExtract + ": " +
	e.message());
	}
	if (errorCount())
	return;

	// Use default entry point name if no name was given via the command
	// line nor linker scripts. For some reason, MIPS entry point name is
	// different from others.
	config->warnMissingEntry =
	(!config->entry.empty() \|\| (!config->shared && !config->relocatable));
	if (config->entry.empty() && !config->relocatable)
	config->entry = (config->emachine == EM_MIPS) ? "__start" : "_start";

	// Handle --trace-symbol.
	for (auto *arg : args.filtered(OPT_trace_symbol))
	symtab->insert(arg->getValue())->traced = true;

	// Handle -u/--undefined before input files. If both a.a and b.so define foo,
	// -u foo a.a b.so will extract a.a.
	for (StringRef name : config->undefined)
	addUnusedUndefined(name)->referenced = true;

	// Add all files to the symbol table. This will add almost all
	// symbols that we need to the symbol table. This process might
	// add files to the link, via autolinking, these files are always
	// appended to the Files vector.
	{
	llvm::TimeTraceScope timeScope("Parse input files");
	for (size_t i = 0; i < files.size(); ++i) {
	llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName());
	parseFile(files[i]);
	}
	}

	// Now that we have every file, we can decide if we will need a
	// dynamic symbol table.
	// We need one if we were asked to export dynamic symbols or if we are
	// producing a shared library.
	// We also need one if any shared libraries are used and for pie executables
	// (probably because the dynamic linker needs it).
	config->hasDynSymTab =
	!ctx->sharedFiles.empty() \|\| config->isPic \|\| config->exportDynamic;

	// Some symbols (such as __ehdr_start) are defined lazily only when there
	// are undefined symbols for them, so we add these to trigger that logic.
	for (StringRef name : script->referencedSymbols) {
	Symbol *sym = addUnusedUndefined(name);
	sym->isUsedInRegularObj = true;
	sym->referenced = true;
	}

	// Prevent LTO from removing any definition referenced by -u.
	for (StringRef name : config->undefined)
	if (Defined *sym = dyn_cast_or_null<Defined>(symtab->find(name)))
	sym->isUsedInRegularObj = true;

	// If an entry symbol is in a static archive, pull out that file now.
	if (Symbol *sym = symtab->find(config->entry))
	handleUndefined(sym, "--entry");

	// Handle the `--undefined-glob <pattern>` options.
	for (StringRef pat : args::getStrings(args, OPT_undefined_glob))
	handleUndefinedGlob(pat);

	// Mark -init and -fini symbols so that the LTO doesn't eliminate them.
	if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->init)))
	sym->isUsedInRegularObj = true;
	if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->fini)))
	sym->isUsedInRegularObj = true;

	// If any of our inputs are bitcode files, the LTO code generator may create
	// references to certain library functions that might not be explicit in the
	// bitcode file's symbol table. If any of those library functions are defined
	// in a bitcode file in an archive member, we need to arrange to use LTO to
	// compile those archive members by adding them to the link beforehand.
	//
	// However, adding all libcall symbols to the link can have undesired
	// consequences. For example, the libgcc implementation of
	// __sync_val_compare_and_swap_8 on 32-bit ARM pulls in an .init_array entry
	// that aborts the program if the Linux kernel does not support 64-bit
	// atomics, which would prevent the program from running even if it does not
	// use 64-bit atomics.
	//
	// Therefore, we only add libcall symbols to the link before LTO if we have
	// to, i.e. if the symbol's definition is in bitcode. Any other required
	// libcall symbols will be added to the link after LTO when we add the LTO
	// object file to the link.
	if (!ctx->bitcodeFiles.empty())
	for (auto *s : lto::LTO::getRuntimeLibcallSymbols())
	handleLibcall(s);

	// Archive members defining __wrap symbols may be extracted.
	std::vector<WrappedSymbol> wrapped = addWrappedSymbols(args);

	// No more lazy bitcode can be extracted at this point. Do post parse work
	// like checking duplicate symbols.
	parallelForEach(ctx->objectFiles, initializeLocalSymbols);
	parallelForEach(ctx->objectFiles, postParseObjectFile);
	parallelForEach(ctx->bitcodeFiles,
	[](BitcodeFile *file) { file->postParse(); });
	for (auto &it : ctx->nonPrevailingSyms) {
	Symbol &sym = *it.first;
	sym.replace(Undefined{sym.file, sym.getName(), sym.binding, sym.stOther,
	sym.type, it.second});
	cast<Undefined>(sym).nonPrevailing = true;
	}
	ctx->nonPrevailingSyms.clear();
	for (const DuplicateSymbol &d : ctx->duplicates)
	reportDuplicate(*d.sym, d.file, d.section, d.value);
	ctx->duplicates.clear();

	// Return if there were name resolution errors.
	if (errorCount())
	return;

	// We want to declare linker script's symbols early,
	// so that we can version them.
	// They also might be exported if referenced by DSOs.
	script->declareSymbols();

	// Handle --exclude-libs. This is before scanVersionScript() due to a
	// workaround for Android ndk: for a defined versioned symbol in an archive
	// without a version node in the version script, Android does not expect a
	// 'has undefined version' error in -shared --exclude-libs=ALL mode (PR36295).
	// GNU ld errors in this case.
	if (args.hasArg(OPT_exclude_libs))
	excludeLibs(args);

	// Create elfHeader early. We need a dummy section in
	// addReservedSymbols to mark the created symbols as not absolute.
	Out::elfHeader = make<OutputSection>("", 0, SHF_ALLOC);

	// We need to create some reserved symbols such as _end. Create them.
	if (!config->relocatable)
	addReservedSymbols();

	// Apply version scripts.
	//
	// For a relocatable output, version scripts don't make sense, and
	// parsing a symbol version string (e.g. dropping "@ver1" from a symbol
	// name "foo@ver1") rather do harm, so we don't call this if -r is given.
	if (!config->relocatable) {
	llvm::TimeTraceScope timeScope("Process symbol versions");
	symtab->scanVersionScript();
	}

	// Skip the normal linked output if some LTO options are specified.
	//
	// For --thinlto-index-only, index file creation is performed in
	// compileBitcodeFiles, so we are done afterwards. --plugin-opt=emit-llvm and
	// --plugin-opt=emit-asm create output files in bitcode or assembly code,
	// respectively. When only certain thinLTO modules are specified for
	// compilation, the intermediate object file are the expected output.
	const bool skipLinkedOutput = config->thinLTOIndexOnly \|\| config->emitLLVM \|\|
	config->ltoEmitAsm \|\|
	!config->thinLTOModulesToCompile.empty();

	// Do link-time optimization if given files are LLVM bitcode files.
	// This compiles bitcode files into real object files.
	//
	// With this the symbol table should be complete. After this, no new names
	// except a few linker-synthesized ones will be added to the symbol table.
	const size_t numObjsBeforeLTO = ctx->objectFiles.size();
	invokeELFT(compileBitcodeFiles, skipLinkedOutput);

	// Symbol resolution finished. Report backward reference problems,
	// --print-archive-stats=, and --why-extract=.
	reportBackrefs();
	writeArchiveStats();
	writeWhyExtract();
	if (errorCount())
	return;

	// Bail out if normal linked output is skipped due to LTO.
	if (skipLinkedOutput)
	return;

	// compileBitcodeFiles may have produced lto.tmp object files. After this, no
	// more file will be added.
	auto newObjectFiles = makeArrayRef(ctx->objectFiles).slice(numObjsBeforeLTO);
	parallelForEach(newObjectFiles, initializeLocalSymbols);
	parallelForEach(newObjectFiles, postParseObjectFile);
	for (const DuplicateSymbol &d : ctx->duplicates)
	reportDuplicate(*d.sym, d.file, d.section, d.value);

	// Handle --exclude-libs again because lto.tmp may reference additional
	// libcalls symbols defined in an excluded archive. This may override
	// versionId set by scanVersionScript().
	if (args.hasArg(OPT_exclude_libs))
	excludeLibs(args);

	// Apply symbol renames for --wrap and combine foo@v1 and foo@@v1.
	redirectSymbols(wrapped);

	// Replace common symbols with regular symbols.
	replaceCommonSymbols();

	{
	llvm::TimeTraceScope timeScope("Aggregate sections");
	// Now that we have a complete list of input files.
	// Beyond this point, no new files are added.
	// Aggregate all input sections into one place.
	for (InputFile *f : ctx->objectFiles)
	for (InputSectionBase *s : f->getSections())
	if (s && s != &InputSection::discarded)
	inputSections.push_back(s);
	for (BinaryFile *f : ctx->binaryFiles)
	for (InputSectionBase *s : f->getSections())
	inputSections.push_back(cast<InputSection>(s));
	}

	{
	llvm::TimeTraceScope timeScope("Strip sections");
	if (ctx->hasSympart.load(std::memory_order_relaxed)) {
	llvm::erase_if(inputSections, [](InputSectionBase *s) {
	if (s->type != SHT_LLVM_SYMPART)
	return false;
	invokeELFT(readSymbolPartitionSection, s);
	return true;
	});
	}
	// We do not want to emit debug sections if --strip-all
	// or --strip-debug are given.
	if (config->strip != StripPolicy::None) {
	llvm::erase_if(inputSections, [](InputSectionBase *s) {
	if (isDebugSection(*s))
	return true;
	if (auto *isec = dyn_cast<InputSection>(s))
	if (InputSectionBase *rel = isec->getRelocatedSection())
	if (isDebugSection(*rel))
	return true;

	return false;
	});
	}
	}

	// Since we now have a complete set of input files, we can create
	// a .d file to record build dependencies.
	if (!config->dependencyFile.empty())
	writeDependencyFile();

	// Now that the number of partitions is fixed, save a pointer to the main
	// partition.
	mainPart = &partitions[0];

	// Read .note.gnu.property sections from input object files which
	// contain a hint to tweak linker's and loader's behaviors.
	config->andFeatures = getAndFeatures();

	// The Target instance handles target-specific stuff, such as applying
	// relocations or writing a PLT section. It also contains target-dependent
	// values such as a default image base address.
	target = getTarget();

	config->eflags = target->calcEFlags();
	// maxPageSize (sometimes called abi page size) is the maximum page size that
	// the output can be run on. For example if the OS can use 4k or 64k page
	// sizes then maxPageSize must be 64k for the output to be useable on both.
	// All important alignment decisions must use this value.
	config->maxPageSize = getMaxPageSize(args);
	// commonPageSize is the most common page size that the output will be run on.
	// For example if an OS can use 4k or 64k page sizes and 4k is more common
	// than 64k then commonPageSize is set to 4k. commonPageSize can be used for
	// optimizations such as DATA_SEGMENT_ALIGN in linker scripts. LLD's use of it
	// is limited to writing trap instructions on the last executable segment.
	config->commonPageSize = getCommonPageSize(args);

	config->imageBase = getImageBase(args);

	if (config->emachine == EM_ARM) {
	// FIXME: These warnings can be removed when lld only uses these features
	// when the input objects have been compiled with an architecture that
	// supports them.
	if (config->armHasBlx == false)
	warn("lld uses blx instruction, no object with architecture supporting "
	"feature detected");
	}

	// This adds a .comment section containing a version string.
	if (!config->relocatable)
	inputSections.push_back(createCommentSection());

	// Split SHF_MERGE and .eh_frame sections into pieces in preparation for garbage collection.
	invokeELFT(splitSections);

	// Garbage collection and removal of shared symbols from unused shared objects.
	invokeELFT(markLive);
	demoteSharedAndLazySymbols();

	// Make copies of any input sections that need to be copied into each
	// partition.
	copySectionsIntoPartitions();

	// Create synthesized sections such as .got and .plt. This is called before
	// processSectionCommands() so that they can be placed by SECTIONS commands.
	invokeELFT(createSyntheticSections);

	// Some input sections that are used for exception handling need to be moved
	// into synthetic sections. Do that now so that they aren't assigned to
	// output sections in the usual way.
	if (!config->relocatable)
	combineEhSections();

	{
	llvm::TimeTraceScope timeScope("Assign sections");

	// Create output sections described by SECTIONS commands.
	script->processSectionCommands();

	// Linker scripts control how input sections are assigned to output
	// sections. Input sections that were not handled by scripts are called
	// "orphans", and they are assigned to output sections by the default rule.
	// Process that.
	script->addOrphanSections();
	}

	{
	llvm::TimeTraceScope timeScope("Merge/finalize input sections");

	// Migrate InputSectionDescription::sectionBases to sections. This includes
	// merging MergeInputSections into a single MergeSyntheticSection. From this
	// point onwards InputSectionDescription::sections should be used instead of
	// sectionBases.
	for (SectionCommand *cmd : script->sectionCommands)
	if (auto *osd = dyn_cast<OutputDesc>(cmd))
	osd->osec.finalizeInputSections();
	llvm::erase_if(inputSections, [](InputSectionBase *s) {
	return isa<MergeInputSection>(s);
	});
	}

	// Two input sections with different output sections should not be folded.
	// ICF runs after processSectionCommands() so that we know the output sections.
	if (config->icf != ICFLevel::None) {
	invokeELFT(findKeepUniqueSections, args);
	invokeELFT(doIcf);
	}

	// Read the callgraph now that we know what was gced or icfed
	if (config->callGraphProfileSort) {
	if (auto *arg = args.getLastArg(OPT_call_graph_ordering_file))
	if (Optional<MemoryBufferRef> buffer = readFile(arg->getValue()))
	readCallGraph(*buffer);
	invokeELFT(readCallGraphsFromObjectFiles);
	}

	// Write the result to the file.
	invokeELFT(writeResult);
	}
	diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
	index 927dc272b532..473809b05e9c 100644
	--- a/lld/ELF/InputFiles.cpp
	+++ b/lld/ELF/InputFiles.cpp
	@@ -1,1787 +1,1787 @@
	//===- InputFiles.cpp -----------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "InputFiles.h"
	#include "Config.h"
	#include "DWARF.h"
	#include "Driver.h"
	#include "InputSection.h"
	#include "LinkerScript.h"
	#include "SymbolTable.h"
	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"
	#include "lld/Common/CommonLinkerContext.h"
	#include "lld/Common/DWARF.h"
	#include "llvm/ADT/CachedHashString.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/LTO/LTO.h"
	#include "llvm/Object/IRObjectFile.h"
	#include "llvm/Support/ARMAttributeParser.h"
	#include "llvm/Support/ARMBuildAttributes.h"
	#include "llvm/Support/Endian.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/RISCVAttributeParser.h"
	#include "llvm/Support/TarWriter.h"
	#include "llvm/Support/raw_ostream.h"

	using namespace llvm;
	using namespace llvm::ELF;
	using namespace llvm::object;
	using namespace llvm::sys;
	using namespace llvm::sys::fs;
	using namespace llvm::support::endian;
	using namespace lld;
	using namespace lld::elf;

	bool InputFile::isInGroup;
	uint32_t InputFile::nextGroupId;

	std::unique_ptr<TarWriter> elf::tar;

	// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
	std::string lld::toString(const InputFile *f) {
	if (!f)
	return "<internal>";

	if (f->toStringCache.empty()) {
	if (f->archiveName.empty())
	f->toStringCache = f->getName();
	else
	(f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache);
	}
	return std::string(f->toStringCache);
	}

	static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) {
	unsigned char size;
	unsigned char endian;
	std::tie(size, endian) = getElfArchType(mb.getBuffer());

	auto report = [&](StringRef msg) {
	StringRef filename = mb.getBufferIdentifier();
	if (archiveName.empty())
	fatal(filename + ": " + msg);
	else
	fatal(archiveName + "(" + filename + "): " + msg);
	};

	if (!mb.getBuffer().startswith(ElfMagic))
	report("not an ELF file");
	if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
	report("corrupted ELF file: invalid data encoding");
	if (size != ELFCLASS32 && size != ELFCLASS64)
	report("corrupted ELF file: invalid file class");

	size_t bufSize = mb.getBuffer().size();
	if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) \|\|
	(size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
	report("corrupted ELF file: file is too short");

	if (size == ELFCLASS32)
	return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
	return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
	}

	InputFile::InputFile(Kind k, MemoryBufferRef m)
	: mb(m), groupId(nextGroupId), fileKind(k) {
	// All files within the same --{start,end}-group get the same group ID.
	// Otherwise, a new file will get a new group ID.
	if (!isInGroup)
	++nextGroupId;
	}

	Optional<MemoryBufferRef> elf::readFile(StringRef path) {
	llvm::TimeTraceScope timeScope("Load input files", path);

	// The --chroot option changes our virtual root directory.
	// This is useful when you are dealing with files created by --reproduce.
	if (!config->chroot.empty() && path.startswith("/"))
	path = saver().save(config->chroot + path);

	log(path);
	config->dependencyFiles.insert(llvm::CachedHashString(path));

	auto mbOrErr = MemoryBuffer::getFile(path, /IsText=/false,
	/RequiresNullTerminator=/false);
	if (auto ec = mbOrErr.getError()) {
	error("cannot open " + path + ": " + ec.message());
	return None;
	}

	MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
	ctx->memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership

	if (tar)
	tar->append(relativeToRoot(path), mbref.getBuffer());
	return mbref;
	}

	// All input object files must be for the same architecture
	// (e.g. it does not make sense to link x86 object files with
	// MIPS object files.) This function checks for that error.
	static bool isCompatible(InputFile *file) {
	if (!file->isElf() && !isa<BitcodeFile>(file))
	return true;

	if (file->ekind == config->ekind && file->emachine == config->emachine) {
	if (config->emachine != EM_MIPS)
	return true;
	if (isMipsN32Abi(file) == config->mipsN32Abi)
	return true;
	}

	StringRef target =
	!config->bfdname.empty() ? config->bfdname : config->emulation;
	if (!target.empty()) {
	error(toString(file) + " is incompatible with " + target);
	return false;
	}

	InputFile *existing = nullptr;
	if (!ctx->objectFiles.empty())
	existing = ctx->objectFiles[0];
	else if (!ctx->sharedFiles.empty())
	existing = ctx->sharedFiles[0];
	else if (!ctx->bitcodeFiles.empty())
	existing = ctx->bitcodeFiles[0];
	std::string with;
	if (existing)
	with = " with " + toString(existing);
	error(toString(file) + " is incompatible" + with);
	return false;
	}

	template <class ELFT> static void doParseFile(InputFile *file) {
	if (!isCompatible(file))
	return;

	// Binary file
	if (auto *f = dyn_cast<BinaryFile>(file)) {
	ctx->binaryFiles.push_back(f);
	f->parse();
	return;
	}

	// Lazy object file
	if (file->lazy) {
	if (auto *f = dyn_cast<BitcodeFile>(file)) {
	ctx->lazyBitcodeFiles.push_back(f);
	f->parseLazy();
	} else {
	cast<ObjFile<ELFT>>(file)->parseLazy();
	}
	return;
	}

	if (config->trace)
	message(toString(file));

	// .so file
	if (auto *f = dyn_cast<SharedFile>(file)) {
	f->parse<ELFT>();
	return;
	}

	// LLVM bitcode file
	if (auto *f = dyn_cast<BitcodeFile>(file)) {
	ctx->bitcodeFiles.push_back(f);
	f->parse<ELFT>();
	return;
	}

	// Regular object file
	ctx->objectFiles.push_back(cast<ELFFileBase>(file));
	cast<ObjFile<ELFT>>(file)->parse();
	}

	// Add symbols in File to the symbol table.
	void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); }

	// Concatenates arguments to construct a string representing an error location.
	static std::string createFileLineMsg(StringRef path, unsigned line) {
	std::string filename = std::string(path::filename(path));
	std::string lineno = ":" + std::to_string(line);
	if (filename == path)
	return filename + lineno;
	return filename + lineno + " (" + path.str() + lineno + ")";
	}

	template <class ELFT>
	static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
	InputSectionBase &sec, uint64_t offset) {
	// In DWARF, functions and variables are stored to different places.
	// First, look up a function for a given offset.
	if (Optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
	return createFileLineMsg(info->FileName, info->Line);

	// If it failed, look up again as a variable.
	if (Optional<std::pair<std::string, unsigned>> fileLine =
	file.getVariableLoc(sym.getName()))
	return createFileLineMsg(fileLine->first, fileLine->second);

	// File.sourceFile contains STT_FILE symbol, and that is a last resort.
	return std::string(file.sourceFile);
	}

	std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
	uint64_t offset) {
	if (kind() != ObjKind)
	return "";
	switch (config->ekind) {
	default:
	llvm_unreachable("Invalid kind");
	case ELF32LEKind:
	return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset);
	case ELF32BEKind:
	return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset);
	case ELF64LEKind:
	return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset);
	case ELF64BEKind:
	return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset);
	}
	}

	StringRef InputFile::getNameForScript() const {
	if (archiveName.empty())
	return getName();

	if (nameForScriptCache.empty())
	nameForScriptCache = (archiveName + Twine(':') + getName()).str();

	return nameForScriptCache;
	}

	template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
	llvm::call_once(initDwarf, [this]() {
	dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
	std::make_unique<LLDDwarfObj<ELFT>>(this), "",
	[&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
	[&](Error warning) {
	warn(getName() + ": " + toString(std::move(warning)));
	}));
	});

	return dwarf.get();
	}

	// Returns the pair of file name and line number describing location of data
	// object (variable, array, etc) definition.
	template <class ELFT>
	Optional<std::pair<std::string, unsigned>>
	ObjFile<ELFT>::getVariableLoc(StringRef name) {
	return getDwarf()->getVariableLoc(name);
	}

	// Returns source line information for a given offset
	// using DWARF debug info.
	template <class ELFT>
	Optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s,
	uint64_t offset) {
	// Detect SectionIndex for specified section.
	uint64_t sectionIndex = object::SectionedAddress::UndefSection;
	ArrayRef<InputSectionBase *> sections = s->file->getSections();
	for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) {
	if (s == sections[curIndex]) {
	sectionIndex = curIndex;
	break;
	}
	}

	return getDwarf()->getDILineInfo(offset, sectionIndex);
	}

	ELFFileBase::ELFFileBase(Kind k, MemoryBufferRef mb) : InputFile(k, mb) {
	ekind = getELFKind(mb, "");

	switch (ekind) {
	case ELF32LEKind:
	init<ELF32LE>();
	break;
	case ELF32BEKind:
	init<ELF32BE>();
	break;
	case ELF64LEKind:
	init<ELF64LE>();
	break;
	case ELF64BEKind:
	init<ELF64BE>();
	break;
	default:
	llvm_unreachable("getELFKind");
	}
	}

	template <typename Elf_Shdr>
	static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
	for (const Elf_Shdr &sec : sections)
	if (sec.sh_type == type)
	return &sec;
	return nullptr;
	}

	template <class ELFT> void ELFFileBase::init() {
	using Elf_Shdr = typename ELFT::Shdr;
	using Elf_Sym = typename ELFT::Sym;

	// Initialize trivial attributes.
	const ELFFile<ELFT> &obj = getObj<ELFT>();
	emachine = obj.getHeader().e_machine;
	osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
	abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];

	ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this);
	elfShdrs = sections.data();
	numELFShdrs = sections.size();

	// Find a symbol table.
	bool isDSO =
	(identify_magic(mb.getBuffer()) == file_magic::elf_shared_object);
	const Elf_Shdr *symtabSec =
	findSection(sections, isDSO ? SHT_DYNSYM : SHT_SYMTAB);

	if (!symtabSec)
	return;

	// Initialize members corresponding to a symbol table.
	firstGlobal = symtabSec->sh_info;

	ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this);
	if (firstGlobal == 0 \|\| firstGlobal > eSyms.size())
	fatal(toString(this) + ": invalid sh_info in symbol table");

	elfSyms = reinterpret_cast<const void *>(eSyms.data());
	numELFSyms = uint32_t(eSyms.size());
	stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this);
	}

	template <class ELFT>
	uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
	return CHECK(
	this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
	this);
	}

	template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
	object::ELFFile<ELFT> obj = this->getObj();
	// Read a section table. justSymbols is usually false.
	if (this->justSymbols)
	initializeJustSymbols();
	else
	initializeSections(ignoreComdats, obj);

	// Read a symbol table.
	initializeSymbols(obj);
	}

	// Sections with SHT_GROUP and comdat bits define comdat section groups.
	// They are identified and deduplicated by group name. This function
	// returns a group name.
	template <class ELFT>
	StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
	const Elf_Shdr &sec) {
	typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
	if (sec.sh_info >= symbols.size())
	fatal(toString(this) + ": invalid symbol index");
	const typename ELFT::Sym &sym = symbols[sec.sh_info];
	return CHECK(sym.getName(this->stringTable), this);
	}

	template <class ELFT>
	bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
	// On a regular link we don't merge sections if -O0 (default is -O1). This
	// sometimes makes the linker significantly faster, although the output will
	// be bigger.
	//
	// Doing the same for -r would create a problem as it would combine sections
	// with different sh_entsize. One option would be to just copy every SHF_MERGE
	// section as is to the output. While this would produce a valid ELF file with
	// usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
	// they see two .debug_str. We could have separate logic for combining
	// SHF_MERGE sections based both on their name and sh_entsize, but that seems
	// to be more trouble than it is worth. Instead, we just use the regular (-O1)
	// logic for -r.
	if (config->optimize == 0 && !config->relocatable)
	return false;

	// A mergeable section with size 0 is useless because they don't have
	// any data to merge. A mergeable string section with size 0 can be
	// argued as invalid because it doesn't end with a null character.
	// We'll avoid a mess by handling them as if they were non-mergeable.
	if (sec.sh_size == 0)
	return false;

	// Check for sh_entsize. The ELF spec is not clear about the zero
	// sh_entsize. It says that "the member [sh_entsize] contains 0 if
	// the section does not hold a table of fixed-size entries". We know
	// that Rust 1.13 produces a string mergeable section with a zero
	// sh_entsize. Here we just accept it rather than being picky about it.
	uint64_t entSize = sec.sh_entsize;
	if (entSize == 0)
	return false;
	if (sec.sh_size % entSize)
	fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" +
	Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" +
	Twine(entSize) + ")");

	if (sec.sh_flags & SHF_WRITE)
	fatal(toString(this) + ":(" + name +
	"): writable SHF_MERGE section is not supported");

	return true;
	}

	// This is for --just-symbols.
	//
	// --just-symbols is a very minor feature that allows you to link your
	// output against other existing program, so that if you load both your
	// program and the other program into memory, your output can refer the
	// other program's symbols.
	//
	// When the option is given, we link "just symbols". The section table is
	// initialized with null pointers.
	template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
	sections.resize(numELFShdrs);
	}

	// An ELF object file may contain a `.deplibs` section. If it exists, the
	// section contains a list of library specifiers such as `m` for libm. This
	// function resolves a given name by finding the first matching library checking
	// the various ways that a library can be specified to LLD. This ELF extension
	// is a form of autolinking and is called `dependent libraries`. It is currently
	// unique to LLVM and lld.
	static void addDependentLibrary(StringRef specifier, const InputFile *f) {
	if (!config->dependentLibraries)
	return;
	if (Optional<std::string> s = searchLibraryBaseName(specifier))
	driver->addFile(saver().save(s), /withLOption=*/true);
	else if (Optional<std::string> s = findFromSearchPaths(specifier))
	driver->addFile(saver().save(s), /withLOption=*/true);
	else if (fs::exists(specifier))
	driver->addFile(specifier, /withLOption=/false);
	else
	error(toString(f) +
	": unable to find library from dependent library specifier: " +
	specifier);
	}

	// Record the membership of a section group so that in the garbage collection
	// pass, section group members are kept or discarded as a unit.
	template <class ELFT>
	static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
	ArrayRef<typename ELFT::Word> entries) {
	bool hasAlloc = false;
	for (uint32_t index : entries.slice(1)) {
	if (index >= sections.size())
	return;
	if (InputSectionBase *s = sections[index])
	if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
	hasAlloc = true;
	}

	// If any member has the SHF_ALLOC flag, the whole group is subject to garbage
	// collection. See the comment in markLive(). This rule retains .debug_types
	// and .rela.debug_types.
	if (!hasAlloc)
	return;

	// Connect the members in a circular doubly-linked list via
	// nextInSectionGroup.
	InputSectionBase *head;
	InputSectionBase *prev = nullptr;
	for (uint32_t index : entries.slice(1)) {
	InputSectionBase *s = sections[index];
	if (!s \|\| s == &InputSection::discarded)
	continue;
	if (prev)
	prev->nextInSectionGroup = s;
	else
	head = s;
	prev = s;
	}
	if (prev)
	prev->nextInSectionGroup = head;
	}

	template <class ELFT>
	void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
	const llvm::object::ELFFile<ELFT> &obj) {
	ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
	StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
	uint64_t size = objSections.size();
	this->sections.resize(size);

	std::vector<ArrayRef<Elf_Word>> selectedGroups;

	for (size_t i = 0; i != size; ++i) {
	if (this->sections[i] == &InputSection::discarded)
	continue;
	const Elf_Shdr &sec = objSections[i];

	// SHF_EXCLUDE'ed sections are discarded by the linker. However,
	// if -r is given, we'll let the final link discard such sections.
	// This is compatible with GNU.
	if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) {
	if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE)
	cgProfileSectionIndex = i;
	if (sec.sh_type == SHT_LLVM_ADDRSIG) {
	// We ignore the address-significance table if we know that the object
	// file was created by objcopy or ld -r. This is because these tools
	// will reorder the symbols in the symbol table, invalidating the data
	// in the address-significance table, which refers to symbols by index.
	if (sec.sh_link != 0)
	this->addrsigSec = &sec;
	else if (config->icf == ICFLevel::Safe)
	warn(toString(this) +
	": --icf=safe conservatively ignores "
	"SHT_LLVM_ADDRSIG [index " +
	Twine(i) +
	"] with sh_link=0 "
	"(likely created using objcopy or ld -r)");
	}
	this->sections[i] = &InputSection::discarded;
	continue;
	}

	switch (sec.sh_type) {
	case SHT_GROUP: {
	// De-duplicate section groups by their signatures.
	StringRef signature = getShtGroupSignature(objSections, sec);
	this->sections[i] = &InputSection::discarded;

	ArrayRef<Elf_Word> entries =
	CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
	if (entries.empty())
	fatal(toString(this) + ": empty SHT_GROUP");

	Elf_Word flag = entries[0];
	if (flag && flag != GRP_COMDAT)
	fatal(toString(this) + ": unsupported SHT_GROUP format");

	bool keepGroup =
	(flag & GRP_COMDAT) == 0 \|\| ignoreComdats \|\|
	symtab->comdatGroups.try_emplace(CachedHashStringRef(signature), this)
	.second;
	if (keepGroup) {
	if (config->relocatable)
	this->sections[i] = createInputSection(
	i, sec, check(obj.getSectionName(sec, shstrtab)));
	selectedGroups.push_back(entries);
	continue;
	}

	// Otherwise, discard group members.
	for (uint32_t secIndex : entries.slice(1)) {
	if (secIndex >= size)
	fatal(toString(this) +
	": invalid section index in group: " + Twine(secIndex));
	this->sections[secIndex] = &InputSection::discarded;
	}
	break;
	}
	case SHT_SYMTAB_SHNDX:
	shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this);
	break;
	case SHT_SYMTAB:
	case SHT_STRTAB:
	case SHT_REL:
	case SHT_RELA:
	case SHT_NULL:
	break;
	case SHT_LLVM_SYMPART:
	ctx->hasSympart.store(true, std::memory_order_relaxed);
	LLVM_FALLTHROUGH;
	default:
	this->sections[i] =
	createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
	}
	}

	// We have a second loop. It is used to:
	// 1) handle SHF_LINK_ORDER sections.
	// 2) create SHT_REL[A] sections. In some cases the section header index of a
	// relocation section may be smaller than that of the relocated section. In
	// such cases, the relocation section would attempt to reference a target
	// section that has not yet been created. For simplicity, delay creation of
	// relocation sections until now.
	for (size_t i = 0; i != size; ++i) {
	if (this->sections[i] == &InputSection::discarded)
	continue;
	const Elf_Shdr &sec = objSections[i];

	if (sec.sh_type == SHT_REL \|\| sec.sh_type == SHT_RELA) {
	// Find a relocation target section and associate this section with that.
	// Target may have been discarded if it is in a different section group
	// and the group is discarded, even though it's a violation of the spec.
	// We handle that situation gracefully by discarding dangling relocation
	// sections.
	const uint32_t info = sec.sh_info;
	InputSectionBase *s = getRelocTarget(i, sec, info);
	if (!s)
	continue;

	// ELF spec allows mergeable sections with relocations, but they are rare,
	// and it is in practice hard to merge such sections by contents, because
	// applying relocations at end of linking changes section contents. So, we
	// simply handle such sections as non-mergeable ones. Degrading like this
	// is acceptable because section merging is optional.
	if (auto *ms = dyn_cast<MergeInputSection>(s)) {
	s = make<InputSection>(ms->file, ms->flags, ms->type, ms->alignment,
	ms->data(), ms->name);
	sections[info] = s;
	}

	if (s->relSecIdx != 0)
	error(
	toString(s) +
	": multiple relocation sections to one section are not supported");
	s->relSecIdx = i;

	// Relocation sections are usually removed from the output, so return
	// `nullptr` for the normal case. However, if -r or --emit-relocs is
	// specified, we need to copy them to the output. (Some post link analysis
	// tools specify --emit-relocs to obtain the information.)
	if (config->copyRelocs) {
	auto *isec = make<InputSection>(
	*this, sec, check(obj.getSectionName(sec, shstrtab)));
	// If the relocated section is discarded (due to /DISCARD/ or
	// --gc-sections), the relocation section should be discarded as well.
	s->dependentSections.push_back(isec);
	sections[i] = isec;
	}
	continue;
	}

	// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
	// the flag.
	if (!sec.sh_link \|\| !(sec.sh_flags & SHF_LINK_ORDER))
	continue;

	InputSectionBase *linkSec = nullptr;
	if (sec.sh_link < size)
	linkSec = this->sections[sec.sh_link];
	if (!linkSec)
	fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link));

	// A SHF_LINK_ORDER section is discarded if its linked-to section is
	// discarded.
	InputSection *isec = cast<InputSection>(this->sections[i]);
	linkSec->dependentSections.push_back(isec);
	if (!isa<InputSection>(linkSec))
	error("a section " + isec->name +
	" with SHF_LINK_ORDER should not refer a non-regular section: " +
	toString(linkSec));
	}

	for (ArrayRef<Elf_Word> entries : selectedGroups)
	handleSectionGroup<ELFT>(this->sections, entries);
	}

	// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
	// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
	// the input objects have been compiled.
	static void updateARMVFPArgs(const ARMAttributeParser &attributes,
	const InputFile *f) {
	Optional<unsigned> attr =
	attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
	if (!attr)
	// If an ABI tag isn't present then it is implicitly given the value of 0
	// which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
	// including some in glibc that don't use FP args (and should have value 3)
	// don't have the attribute so we do not consider an implicit value of 0
	// as a clash.
	return;

	unsigned vfpArgs = *attr;
	ARMVFPArgKind arg;
	switch (vfpArgs) {
	case ARMBuildAttrs::BaseAAPCS:
	arg = ARMVFPArgKind::Base;
	break;
	case ARMBuildAttrs::HardFPAAPCS:
	arg = ARMVFPArgKind::VFP;
	break;
	case ARMBuildAttrs::ToolChainFPPCS:
	// Tool chain specific convention that conforms to neither AAPCS variant.
	arg = ARMVFPArgKind::ToolChain;
	break;
	case ARMBuildAttrs::CompatibleFPAAPCS:
	// Object compatible with all conventions.
	return;
	default:
	error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs));
	return;
	}
	// Follow ld.bfd and error if there is a mix of calling conventions.
	if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default)
	error(toString(f) + ": incompatible Tag_ABI_VFP_args");
	else
	config->armVFPArgs = arg;
	}

	// The ARM support in lld makes some use of instructions that are not available
	// on all ARM architectures. Namely:
	// - Use of BLX instruction for interworking between ARM and Thumb state.
	// - Use of the extended Thumb branch encoding in relocation.
	// - Use of the MOVT/MOVW instructions in Thumb Thunks.
	// The ARM Attributes section contains information about the architecture chosen
	// at compile time. We follow the convention that if at least one input object
	// is compiled with an architecture that supports these features then lld is
	// permitted to use them.
	static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
	Optional<unsigned> attr =
	attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
	if (!attr)
	return;
	auto arch = attr.value();
	switch (arch) {
	case ARMBuildAttrs::Pre_v4:
	case ARMBuildAttrs::v4:
	case ARMBuildAttrs::v4T:
	// Architectures prior to v5 do not support BLX instruction
	break;
	case ARMBuildAttrs::v5T:
	case ARMBuildAttrs::v5TE:
	case ARMBuildAttrs::v5TEJ:
	case ARMBuildAttrs::v6:
	case ARMBuildAttrs::v6KZ:
	case ARMBuildAttrs::v6K:
	config->armHasBlx = true;
	// Architectures used in pre-Cortex processors do not support
	// The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
	// of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
	break;
	default:
	// All other Architectures have BLX and extended branch encoding
	config->armHasBlx = true;
	config->armJ1J2BranchEncoding = true;
	if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
	// All Architectures used in Cortex processors with the exception
	// of v6-M and v6S-M have the MOVT and MOVW instructions.
	config->armHasMovtMovw = true;
	break;
	}
	}

	// If a source file is compiled with x86 hardware-assisted call flow control
	// enabled, the generated object file contains feature flags indicating that
	// fact. This function reads the feature flags and returns it.
	//
	// Essentially we want to read a single 32-bit value in this function, but this
	// function is rather complicated because the value is buried deep inside a
	// .note.gnu.property section.
	//
	// The section consists of one or more NOTE records. Each NOTE record consists
	// of zero or more type-length-value fields. We want to find a field of a
	// certain type. It seems a bit too much to just store a 32-bit value, perhaps
	// the ABI is unnecessarily complicated.
	template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) {
	using Elf_Nhdr = typename ELFT::Nhdr;
	using Elf_Note = typename ELFT::Note;

	uint32_t featuresSet = 0;
	ArrayRef<uint8_t> data = sec.rawData;
	auto reportFatal = [&](const uint8_t place, const char msg) {
	fatal(toString(sec.file) + ":(" + sec.name + "+0x" +
	Twine::utohexstr(place - sec.rawData.data()) + "): " + msg);
	};
	while (!data.empty()) {
	// Read one NOTE record.
	auto nhdr = reinterpret_cast<const Elf_Nhdr >(data.data());
	if (data.size() < sizeof(Elf_Nhdr) \|\| data.size() < nhdr->getSize())
	reportFatal(data.data(), "data is too short");

	Elf_Note note(*nhdr);
	if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 \|\| note.getName() != "GNU") {
	data = data.slice(nhdr->getSize());
	continue;
	}

	uint32_t featureAndType = config->emachine == EM_AARCH64
	? GNU_PROPERTY_AARCH64_FEATURE_1_AND
	: GNU_PROPERTY_X86_FEATURE_1_AND;

	// Read a body of a NOTE record, which consists of type-length-value fields.
	ArrayRef<uint8_t> desc = note.getDesc();
	while (!desc.empty()) {
	const uint8_t *place = desc.data();
	if (desc.size() < 8)
	reportFatal(place, "program property is too short");
	uint32_t type = read32<ELFT::TargetEndianness>(desc.data());
	uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4);
	desc = desc.slice(8);
	if (desc.size() < size)
	reportFatal(place, "program property is too short");

	if (type == featureAndType) {
	// We found a FEATURE_1_AND field. There may be more than one of these
	// in a .note.gnu.property section, for a relocatable object we
	// accumulate the bits set.
	if (size < 4)
	reportFatal(place, "FEATURE_1_AND entry is too short");
	featuresSet \|= read32<ELFT::TargetEndianness>(desc.data());
	}

	// Padding is present in the note descriptor, if necessary.
	desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size));
	}

	// Go to next NOTE record to look for more FEATURE_1_AND descriptions.
	data = data.slice(nhdr->getSize());
	}

	return featuresSet;
	}

	template <class ELFT>
	InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx,
	const Elf_Shdr &sec,
	uint32_t info) {
	if (info < this->sections.size()) {
	InputSectionBase *target = this->sections[info];

	// Strictly speaking, a relocation section must be included in the
	// group of the section it relocates. However, LLVM 3.3 and earlier
	// would fail to do so, so we gracefully handle that case.
	if (target == &InputSection::discarded)
	return nullptr;

	if (target != nullptr)
	return target;
	}

	error(toString(this) + Twine(": relocation section (index ") + Twine(idx) +
	") has invalid sh_info (" + Twine(info) + ")");
	return nullptr;
	}

	template <class ELFT>
	InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
	const Elf_Shdr &sec,
	StringRef name) {
	if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) {
	ARMAttributeParser attributes;
	ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec));
	if (Error e = attributes.parse(contents, config->ekind == ELF32LEKind
	? support::little
	: support::big)) {
	auto isec = make<InputSection>(this, sec, name);
	warn(toString(isec) + ": " + llvm::toString(std::move(e)));
	} else {
	updateSupportedARMFeatures(attributes);
	updateARMVFPArgs(attributes, this);

	// FIXME: Retain the first attribute section we see. The eglibc ARM
	// dynamic loaders require the presence of an attribute section for dlopen
	// to work. In a full implementation we would merge all attribute
	// sections.
	if (in.attributes == nullptr) {
	in.attributes = std::make_unique<InputSection>(*this, sec, name);
	return in.attributes.get();
	}
	return &InputSection::discarded;
	}
	}

	if (sec.sh_type == SHT_RISCV_ATTRIBUTES && config->emachine == EM_RISCV) {
	RISCVAttributeParser attributes;
	ArrayRef<uint8_t> contents = check(this->getObj().getSectionContents(sec));
	if (Error e = attributes.parse(contents, support::little)) {
	auto isec = make<InputSection>(this, sec, name);
	warn(toString(isec) + ": " + llvm::toString(std::move(e)));
	} else {
	// FIXME: Validate arch tag contains C if and only if EF_RISCV_RVC is
	// present.

	// FIXME: Retain the first attribute section we see. Tools such as
	// llvm-objdump make use of the attribute section to determine which
	// standard extensions to enable. In a full implementation we would merge
	// all attribute sections.
	if (in.attributes == nullptr) {
	in.attributes = std::make_unique<InputSection>(*this, sec, name);
	return in.attributes.get();
	}
	return &InputSection::discarded;
	}
	}

	if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) {
	ArrayRef<char> data =
	CHECK(this->getObj().template getSectionContentsAsArray<char>(sec), this);
	if (!data.empty() && data.back() != '\0') {
	error(toString(this) +
	": corrupted dependent libraries section (unterminated string): " +
	name);
	return &InputSection::discarded;
	}
	for (const char d = data.begin(), e = data.end(); d < e;) {
	StringRef s(d);
	addDependentLibrary(s, this);
	d += s.size() + 1;
	}
	return &InputSection::discarded;
	}

	if (name.startswith(".n")) {
	// The GNU linker uses .note.GNU-stack section as a marker indicating
	// that the code in the object file does not expect that the stack is
	// executable (in terms of NX bit). If all input files have the marker,
	// the GNU linker adds a PT_GNU_STACK segment to tells the loader to
	// make the stack non-executable. Most object files have this section as
	// of 2017.
	//
	// But making the stack non-executable is a norm today for security
	// reasons. Failure to do so may result in a serious security issue.
	// Therefore, we make LLD always add PT_GNU_STACK unless it is
	// explicitly told to do otherwise (by -z execstack). Because the stack
	// executable-ness is controlled solely by command line options,
	// .note.GNU-stack sections are simply ignored.
	if (name == ".note.GNU-stack")
	return &InputSection::discarded;

	// Object files that use processor features such as Intel Control-Flow
	// Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
	// .note.gnu.property section containing a bitfield of feature bits like the
	// GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
	//
	// Since we merge bitmaps from multiple object files to create a new
	// .note.gnu.property containing a single AND'ed bitmap, we discard an input
	// file's .note.gnu.property section.
	if (name == ".note.gnu.property") {
	this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name));
	return &InputSection::discarded;
	}

	// Split stacks is a feature to support a discontiguous stack,
	// commonly used in the programming language Go. For the details,
	// see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
	// for split stack will include a .note.GNU-split-stack section.
	if (name == ".note.GNU-split-stack") {
	if (config->relocatable) {
	error(
	"cannot mix split-stack and non-split-stack in a relocatable link");
	return &InputSection::discarded;
	}
	this->splitStack = true;
	return &InputSection::discarded;
	}

	// An object file cmpiled for split stack, but where some of the
	// functions were compiled with the no_split_stack_attribute will
	// include a .note.GNU-no-split-stack section.
	if (name == ".note.GNU-no-split-stack") {
	this->someNoSplitStack = true;
	return &InputSection::discarded;
	}

	// Strip existing .note.gnu.build-id sections so that the output won't have
	// more than one build-id. This is not usually a problem because input
	// object files normally don't have .build-id sections, but you can create
	// such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
	// against it.
	if (name == ".note.gnu.build-id")
	return &InputSection::discarded;
	}

	// The linker merges EH (exception handling) frames and creates a
	// .eh_frame_hdr section for runtime. So we handle them with a special
	// class. For relocatable outputs, they are just passed through.
	if (name == ".eh_frame" && !config->relocatable)
	return make<EhInputSection>(*this, sec, name);

	if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
	return make<MergeInputSection>(*this, sec, name);
	return make<InputSection>(*this, sec, name);
	}

	// Initialize this->Symbols. this->Symbols is a parallel array as
	// its corresponding ELF symbol table.
	template <class ELFT>
	void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
	SymbolTable &symtab = *elf::symtab;

	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
	symbols.resize(eSyms.size());

	// Some entries have been filled by LazyObjFile.
	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
	if (!symbols[i])
	symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));

	// Perform symbol resolution on non-local symbols.
	SmallVector<unsigned, 32> undefineds;
	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
	const Elf_Sym &eSym = eSyms[i];
	uint32_t secIdx = eSym.st_shndx;
	if (secIdx == SHN_UNDEF) {
	undefineds.push_back(i);
	continue;
	}

	uint8_t binding = eSym.getBinding();
	uint8_t stOther = eSym.st_other;
	uint8_t type = eSym.getType();
	uint64_t value = eSym.st_value;
	uint64_t size = eSym.st_size;

	Symbol *sym = symbols[i];
	sym->isUsedInRegularObj = true;
	if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
	if (value == 0 \|\| value >= UINT32_MAX)
	fatal(toString(this) + ": common symbol '" + sym->getName() +
	"' has invalid alignment: " + Twine(value));
	hasCommonSyms = true;
	sym->resolve(
	CommonSymbol{this, StringRef(), binding, stOther, type, value, size});
	continue;
	}

	// Handle global defined symbols. Defined::section will be set in postParse.
	sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size,
	nullptr});
	}

	// Undefined symbols (excluding those defined relative to non-prevailing
	// sections) can trigger recursive extract. Process defined symbols first so
	// that the relative order between a defined symbol and an undefined symbol
	// does not change the symbol resolution behavior. In addition, a set of
	// interconnected symbols will all be resolved to the same file, instead of
	// being resolved to different files.
	for (unsigned i : undefineds) {
	const Elf_Sym &eSym = eSyms[i];
	Symbol *sym = symbols[i];
	sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other,
	eSym.getType()});
	sym->isUsedInRegularObj = true;
	sym->referenced = true;
	}
	}

	template <class ELFT> void ObjFile<ELFT>::initializeLocalSymbols() {
	if (!firstGlobal)
	return;
	localSymStorage = std::make_unique<SymbolUnion[]>(firstGlobal);
	SymbolUnion *locals = localSymStorage.get();

	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
	for (size_t i = 0, end = firstGlobal; i != end; ++i) {
	const Elf_Sym &eSym = eSyms[i];
	uint32_t secIdx = eSym.st_shndx;
	if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
	else if (secIdx >= SHN_LORESERVE)
	secIdx = 0;
	if (LLVM_UNLIKELY(secIdx >= sections.size()))
	fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
	if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
	error(toString(this) + ": non-local symbol (" + Twine(i) +
	") found at index < .symtab's sh_info (" + Twine(end) + ")");

	InputSectionBase *sec = sections[secIdx];
	uint8_t type = eSym.getType();
	if (type == STT_FILE)
	sourceFile = CHECK(eSym.getName(stringTable), this);
	if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
	fatal(toString(this) + ": invalid symbol name offset");
	StringRef name(stringTable.data() + eSym.st_name);

	symbols[i] = reinterpret_cast<Symbol *>(locals + i);
	if (eSym.st_shndx == SHN_UNDEF \|\| sec == &InputSection::discarded)
	new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
	/discardedSecIdx=/secIdx);
	else
	new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
	eSym.st_value, eSym.st_size, sec);
	symbols[i]->isUsedInRegularObj = true;
	}
	}

	// Called after all ObjFile::parse is called for all ObjFiles. This checks
	// duplicate symbols and may do symbol property merge in the future.
	template <class ELFT> void ObjFile<ELFT>::postParse() {
	static std::mutex mu;
	ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
	const Elf_Sym &eSym = eSyms[i];
	Symbol &sym = *symbols[i];
	uint32_t secIdx = eSym.st_shndx;
	uint8_t binding = eSym.getBinding();
	if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
	binding != STB_GNU_UNIQUE))
	errorOrWarn(toString(this) + ": symbol (" + Twine(i) +
	") has invalid binding: " + Twine((int)binding));

	// st_value of STT_TLS represents the assigned offset, not the actual
	// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
	// only be referenced by special TLS relocations. It is usually an error if
	// a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
	if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
	eSym.getType() != STT_NOTYPE)
	errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
	toString(sym.file) + "\n>>> in " + toString(this));

	// Handle non-COMMON defined symbol below. !sym.file allows a symbol
	// assignment to redefine a symbol without an error.
	if (!sym.file \|\| !sym.isDefined() \|\| secIdx == SHN_UNDEF \|\|
	secIdx == SHN_COMMON)
	continue;

	if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
	secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
	else if (secIdx >= SHN_LORESERVE)
	secIdx = 0;
	if (LLVM_UNLIKELY(secIdx >= sections.size()))
	fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
	InputSectionBase *sec = sections[secIdx];
	if (sec == &InputSection::discarded) {
	if (sym.traced) {
	printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
	sym.stOther, sym.type, secIdx},
	sym.getName());
	}
	if (sym.file == this) {
	std::lock_guard<std::mutex> lock(mu);
	ctx->nonPrevailingSyms.emplace_back(&sym, secIdx);
	}
	continue;
	}

	if (sym.file == this) {
	cast<Defined>(sym).section = sec;
	continue;
	}

	- if (binding == STB_WEAK)
	+ if (sym.binding == STB_WEAK \|\| binding == STB_WEAK)
	continue;
	std::lock_guard<std::mutex> lock(mu);
	ctx->duplicates.push_back({&sym, this, sec, eSym.st_value});
	}
	}

	// The handling of tentative definitions (COMMON symbols) in archives is murky.
	// A tentative definition will be promoted to a global definition if there are
	// no non-tentative definitions to dominate it. When we hold a tentative
	// definition to a symbol and are inspecting archive members for inclusion
	// there are 2 ways we can proceed:
	//
	// 1) Consider the tentative definition a 'real' definition (ie promotion from
	// tentative to real definition has already happened) and not inspect
	// archive members for Global/Weak definitions to replace the tentative
	// definition. An archive member would only be included if it satisfies some
	// other undefined symbol. This is the behavior Gold uses.
	//
	// 2) Consider the tentative definition as still undefined (ie the promotion to
	// a real definition happens only after all symbol resolution is done).
	// The linker searches archive members for STB_GLOBAL definitions to
	// replace the tentative definition with. This is the behavior used by
	// GNU ld.
	//
	// The second behavior is inherited from SysVR4, which based it on the FORTRAN
	// COMMON BLOCK model. This behavior is needed for proper initialization in old
	// (pre F90) FORTRAN code that is packaged into an archive.
	//
	// The following functions search archive members for definitions to replace
	// tentative definitions (implementing behavior 2).
	static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
	StringRef archiveName) {
	IRSymtabFile symtabFile = check(readIRSymtab(mb));
	for (const irsymtab::Reader::SymbolRef &sym :
	symtabFile.TheReader.symbols()) {
	if (sym.isGlobal() && sym.getName() == symName)
	return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
	}
	return false;
	}

	template <class ELFT>
	static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
	StringRef archiveName) {
	ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(mb, archiveName);
	StringRef stringtable = obj->getStringTable();

	for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
	Expected<StringRef> name = sym.getName(stringtable);
	if (name && name.get() == symName)
	return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
	!sym.isCommon();
	}
	return false;
	}

	static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
	StringRef archiveName) {
	switch (getELFKind(mb, archiveName)) {
	case ELF32LEKind:
	return isNonCommonDef<ELF32LE>(mb, symName, archiveName);
	case ELF32BEKind:
	return isNonCommonDef<ELF32BE>(mb, symName, archiveName);
	case ELF64LEKind:
	return isNonCommonDef<ELF64LE>(mb, symName, archiveName);
	case ELF64BEKind:
	return isNonCommonDef<ELF64BE>(mb, symName, archiveName);
	default:
	llvm_unreachable("getELFKind");
	}
	}

	unsigned SharedFile::vernauxNum;

	// Parse the version definitions in the object file if present, and return a
	// vector whose nth element contains a pointer to the Elf_Verdef for version
	// identifier n. Version identifiers that are not definitions map to nullptr.
	template <typename ELFT>
	static SmallVector<const void *, 0>
	parseVerdefs(const uint8_t base, const typename ELFT::Shdr sec) {
	if (!sec)
	return {};

	// Build the Verdefs array by following the chain of Elf_Verdef objects
	// from the start of the .gnu.version_d section.
	SmallVector<const void *, 0> verdefs;
	const uint8_t *verdef = base + sec->sh_offset;
	for (unsigned i = 0, e = sec->sh_info; i != e; ++i) {
	auto curVerdef = reinterpret_cast<const typename ELFT::Verdef >(verdef);
	verdef += curVerdef->vd_next;
	unsigned verdefIndex = curVerdef->vd_ndx;
	if (verdefIndex >= verdefs.size())
	verdefs.resize(verdefIndex + 1);
	verdefs[verdefIndex] = curVerdef;
	}
	return verdefs;
	}

	// Parse SHT_GNU_verneed to properly set the name of a versioned undefined
	// symbol. We detect fatal issues which would cause vulnerabilities, but do not
	// implement sophisticated error checking like in llvm-readobj because the value
	// of such diagnostics is low.
	template <typename ELFT>
	std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
	const typename ELFT::Shdr *sec) {
	if (!sec)
	return {};
	std::vector<uint32_t> verneeds;
	ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this);
	const uint8_t *verneedBuf = data.begin();
	for (unsigned i = 0; i != sec->sh_info; ++i) {
	if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end())
	fatal(toString(this) + " has an invalid Verneed");
	auto vn = reinterpret_cast<const typename ELFT::Verneed >(verneedBuf);
	const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
	for (unsigned j = 0; j != vn->vn_cnt; ++j) {
	if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end())
	fatal(toString(this) + " has an invalid Vernaux");
	auto aux = reinterpret_cast<const typename ELFT::Vernaux >(vernauxBuf);
	if (aux->vna_name >= this->stringTable.size())
	fatal(toString(this) + " has a Vernaux with an invalid vna_name");
	uint16_t version = aux->vna_other & VERSYM_VERSION;
	if (version >= verneeds.size())
	verneeds.resize(version + 1);
	verneeds[version] = aux->vna_name;
	vernauxBuf += aux->vna_next;
	}
	verneedBuf += vn->vn_next;
	}
	return verneeds;
	}

	// We do not usually care about alignments of data in shared object
	// files because the loader takes care of it. However, if we promote a
	// DSO symbol to point to .bss due to copy relocation, we need to keep
	// the original alignment requirements. We infer it in this function.
	template <typename ELFT>
	static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
	const typename ELFT::Sym &sym) {
	uint64_t ret = UINT64_MAX;
	if (sym.st_value)
	ret = 1ULL << countTrailingZeros((uint64_t)sym.st_value);
	if (0 < sym.st_shndx && sym.st_shndx < sections.size())
	ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
	return (ret > UINT32_MAX) ? 0 : ret;
	}

	// Fully parse the shared object file.
	//
	// This function parses symbol versions. If a DSO has version information,
	// the file has a ".gnu.version_d" section which contains symbol version
	// definitions. Each symbol is associated to one version through a table in
	// ".gnu.version" section. That table is a parallel array for the symbol
	// table, and each table entry contains an index in ".gnu.version_d".
	//
	// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
	// VER_NDX_GLOBAL. There's no table entry for these special versions in
	// ".gnu.version_d".
	//
	// The file format for symbol versioning is perhaps a bit more complicated
	// than necessary, but you can easily understand the code if you wrap your
	// head around the data structure described above.
	template <class ELFT> void SharedFile::parse() {
	using Elf_Dyn = typename ELFT::Dyn;
	using Elf_Shdr = typename ELFT::Shdr;
	using Elf_Sym = typename ELFT::Sym;
	using Elf_Verdef = typename ELFT::Verdef;
	using Elf_Versym = typename ELFT::Versym;

	ArrayRef<Elf_Dyn> dynamicTags;
	const ELFFile<ELFT> obj = this->getObj<ELFT>();
	ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();

	const Elf_Shdr *versymSec = nullptr;
	const Elf_Shdr *verdefSec = nullptr;
	const Elf_Shdr *verneedSec = nullptr;

	// Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
	for (const Elf_Shdr &sec : sections) {
	switch (sec.sh_type) {
	default:
	continue;
	case SHT_DYNAMIC:
	dynamicTags =
	CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
	break;
	case SHT_GNU_versym:
	versymSec = &sec;
	break;
	case SHT_GNU_verdef:
	verdefSec = &sec;
	break;
	case SHT_GNU_verneed:
	verneedSec = &sec;
	break;
	}
	}

	if (versymSec && numELFSyms == 0) {
	error("SHT_GNU_versym should be associated with symbol table");
	return;
	}

	// Search for a DT_SONAME tag to initialize this->soName.
	for (const Elf_Dyn &dyn : dynamicTags) {
	if (dyn.d_tag == DT_NEEDED) {
	uint64_t val = dyn.getVal();
	if (val >= this->stringTable.size())
	fatal(toString(this) + ": invalid DT_NEEDED entry");
	dtNeeded.push_back(this->stringTable.data() + val);
	} else if (dyn.d_tag == DT_SONAME) {
	uint64_t val = dyn.getVal();
	if (val >= this->stringTable.size())
	fatal(toString(this) + ": invalid DT_SONAME entry");
	soName = this->stringTable.data() + val;
	}
	}

	// DSOs are uniquified not by filename but by soname.
	DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
	bool wasInserted;
	std::tie(it, wasInserted) =
	symtab->soNames.try_emplace(CachedHashStringRef(soName), this);

	// If a DSO appears more than once on the command line with and without
	// --as-needed, --no-as-needed takes precedence over --as-needed because a
	// user can add an extra DSO with --no-as-needed to force it to be added to
	// the dependency list.
	it->second->isNeeded \|= isNeeded;
	if (!wasInserted)
	return;

	ctx->sharedFiles.push_back(this);

	verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
	std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);

	// Parse ".gnu.version" section which is a parallel array for the symbol
	// table. If a given file doesn't have a ".gnu.version" section, we use
	// VER_NDX_GLOBAL.
	size_t size = numELFSyms - firstGlobal;
	std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
	if (versymSec) {
	ArrayRef<Elf_Versym> versym =
	CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
	this)
	.slice(firstGlobal);
	for (size_t i = 0; i < size; ++i)
	versyms[i] = versym[i].vs_index;
	}

	// System libraries can have a lot of symbols with versions. Using a
	// fixed buffer for computing the versions name (foo@ver) can save a
	// lot of allocations.
	SmallString<0> versionedNameBuffer;

	// Add symbols to the symbol table.
	SymbolTable &symtab = *elf::symtab;
	ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
	for (size_t i = 0, e = syms.size(); i != e; ++i) {
	const Elf_Sym &sym = syms[i];

	// ELF spec requires that all local symbols precede weak or global
	// symbols in each symbol table, and the index of first non-local symbol
	// is stored to sh_info. If a local symbol appears after some non-local
	// symbol, that's a violation of the spec.
	StringRef name = CHECK(sym.getName(stringTable), this);
	if (sym.getBinding() == STB_LOCAL) {
	warn("found local symbol '" + name +
	"' in global part of symbol table in file " + toString(this));
	continue;
	}

	uint16_t idx = versyms[i] & ~VERSYM_HIDDEN;
	if (sym.isUndefined()) {
	// For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
	// as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
	if (idx != VER_NDX_LOCAL && idx != VER_NDX_GLOBAL) {
	if (idx >= verneeds.size()) {
	error("corrupt input file: version need index " + Twine(idx) +
	" for symbol " + name + " is out of bounds\n>>> defined in " +
	toString(this));
	continue;
	}
	StringRef verName = stringTable.data() + verneeds[idx];
	versionedNameBuffer.clear();
	name = saver().save(
	(name + "@" + verName).toStringRef(versionedNameBuffer));
	}
	Symbol *s = symtab.addSymbol(
	Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
	s->exportDynamic = true;
	if (s->isUndefined() && sym.getBinding() != STB_WEAK &&
	config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
	requiredSymbols.push_back(s);
	continue;
	}

	// MIPS BFD linker puts _gp_disp symbol into DSO files and incorrectly
	// assigns VER_NDX_LOCAL to this section global symbol. Here is a
	// workaround for this bug.
	if (config->emachine == EM_MIPS && idx == VER_NDX_LOCAL &&
	name == "_gp_disp")
	continue;

	uint32_t alignment = getAlignment<ELFT>(sections, sym);
	if (!(versyms[i] & VERSYM_HIDDEN)) {
	auto *s = symtab.addSymbol(
	SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
	sym.getType(), sym.st_value, sym.st_size, alignment});
	if (s->file == this)
	s->verdefIndex = idx;
	}

	// Also add the symbol with the versioned name to handle undefined symbols
	// with explicit versions.
	if (idx == VER_NDX_GLOBAL)
	continue;

	if (idx >= verdefs.size() \|\| idx == VER_NDX_LOCAL) {
	error("corrupt input file: version definition index " + Twine(idx) +
	" for symbol " + name + " is out of bounds\n>>> defined in " +
	toString(this));
	continue;
	}

	StringRef verName =
	stringTable.data() +
	reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
	versionedNameBuffer.clear();
	name = (name + "@" + verName).toStringRef(versionedNameBuffer);
	auto *s = symtab.addSymbol(
	SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other,
	sym.getType(), sym.st_value, sym.st_size, alignment});
	if (s->file == this)
	s->verdefIndex = idx;
	}
	}

	static ELFKind getBitcodeELFKind(const Triple &t) {
	if (t.isLittleEndian())
	return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
	return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
	}

	static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
	switch (t.getArch()) {
	case Triple::aarch64:
	case Triple::aarch64_be:
	return EM_AARCH64;
	case Triple::amdgcn:
	case Triple::r600:
	return EM_AMDGPU;
	case Triple::arm:
	case Triple::thumb:
	return EM_ARM;
	case Triple::avr:
	return EM_AVR;
	case Triple::hexagon:
	return EM_HEXAGON;
	case Triple::mips:
	case Triple::mipsel:
	case Triple::mips64:
	case Triple::mips64el:
	return EM_MIPS;
	case Triple::msp430:
	return EM_MSP430;
	case Triple::ppc:
	case Triple::ppcle:
	return EM_PPC;
	case Triple::ppc64:
	case Triple::ppc64le:
	return EM_PPC64;
	case Triple::riscv32:
	case Triple::riscv64:
	return EM_RISCV;
	case Triple::x86:
	return t.isOSIAMCU() ? EM_IAMCU : EM_386;
	case Triple::x86_64:
	return EM_X86_64;
	default:
	error(path + ": could not infer e_machine from bitcode target triple " +
	t.str());
	return EM_NONE;
	}
	}

	static uint8_t getOsAbi(const Triple &t) {
	switch (t.getOS()) {
	case Triple::AMDHSA:
	return ELF::ELFOSABI_AMDGPU_HSA;
	case Triple::AMDPAL:
	return ELF::ELFOSABI_AMDGPU_PAL;
	case Triple::Mesa3D:
	return ELF::ELFOSABI_AMDGPU_MESA3D;
	default:
	return ELF::ELFOSABI_NONE;
	}
	}

	BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
	uint64_t offsetInArchive, bool lazy)
	: InputFile(BitcodeKind, mb) {
	this->archiveName = archiveName;
	this->lazy = lazy;

	std::string path = mb.getBufferIdentifier().str();
	if (config->thinLTOIndexOnly)
	path = replaceThinLTOSuffix(mb.getBufferIdentifier());

	// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
	// name. If two archives define two members with the same name, this
	// causes a collision which result in only one of the objects being taken
	// into consideration at LTO time (which very likely causes undefined
	// symbols later in the link stage). So we append file offset to make
	// filename unique.
	StringRef name = archiveName.empty()
	? saver().save(path)
	: saver().save(archiveName + "(" + path::filename(path) +
	" at " + utostr(offsetInArchive) + ")");
	MemoryBufferRef mbref(mb.getBuffer(), name);

	obj = CHECK(lto::InputFile::create(mbref), this);

	Triple t(obj->getTargetTriple());
	ekind = getBitcodeELFKind(t);
	emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t);
	osabi = getOsAbi(t);
	}

	static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
	switch (gvVisibility) {
	case GlobalValue::DefaultVisibility:
	return STV_DEFAULT;
	case GlobalValue::HiddenVisibility:
	return STV_HIDDEN;
	case GlobalValue::ProtectedVisibility:
	return STV_PROTECTED;
	}
	llvm_unreachable("unknown visibility");
	}

	template <class ELFT>
	static void
	createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
	const lto::InputFile::Symbol &objSym, BitcodeFile &f) {
	uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
	uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
	uint8_t visibility = mapVisibility(objSym.getVisibility());

	if (!sym)
	sym = symtab->insert(saver().save(objSym.getName()));

	int c = objSym.getComdatIndex();
	if (objSym.isUndefined() \|\| (c != -1 && !keptComdats[c])) {
	Undefined newSym(&f, StringRef(), binding, visibility, type);
	sym->resolve(newSym);
	sym->referenced = true;
	return;
	}

	if (objSym.isCommon()) {
	sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT,
	objSym.getCommonAlignment(),
	objSym.getCommonSize()});
	} else {
	Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr);
	if (objSym.canBeOmittedFromSymbolTable())
	newSym.exportDynamic = false;
	sym->resolve(newSym);
	}
	}

	template <class ELFT> void BitcodeFile::parse() {
	for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
	keptComdats.push_back(
	s.second == Comdat::NoDeduplicate \|\|
	symtab->comdatGroups.try_emplace(CachedHashStringRef(s.first), this)
	.second);
	}

	symbols.resize(obj->symbols().size());
	// Process defined symbols first. See the comment in
	// ObjFile<ELFT>::initializeSymbols.
	for (auto it : llvm::enumerate(obj->symbols()))
	if (!it.value().isUndefined()) {
	Symbol *&sym = symbols[it.index()];
	createBitcodeSymbol<ELFT>(sym, keptComdats, it.value(), *this);
	}
	for (auto it : llvm::enumerate(obj->symbols()))
	if (it.value().isUndefined()) {
	Symbol *&sym = symbols[it.index()];
	createBitcodeSymbol<ELFT>(sym, keptComdats, it.value(), *this);
	}

	for (auto l : obj->getDependentLibraries())
	addDependentLibrary(l, this);
	}

	void BitcodeFile::parseLazy() {
	SymbolTable &symtab = *elf::symtab;
	symbols.resize(obj->symbols().size());
	for (auto it : llvm::enumerate(obj->symbols()))
	if (!it.value().isUndefined()) {
	auto *sym = symtab.insert(saver().save(it.value().getName()));
	sym->resolve(LazyObject{*this});
	symbols[it.index()] = sym;
	}
	}

	void BitcodeFile::postParse() {
	for (auto it : llvm::enumerate(obj->symbols())) {
	const Symbol &sym = *symbols[it.index()];
	const auto &objSym = it.value();
	if (sym.file == this \|\| !sym.isDefined() \|\| objSym.isUndefined() \|\|
	objSym.isCommon() \|\| objSym.isWeak())
	continue;
	int c = objSym.getComdatIndex();
	if (c != -1 && !keptComdats[c])
	continue;
	reportDuplicate(sym, this, nullptr, 0);
	}
	}

	void BinaryFile::parse() {
	ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
	auto *section = make<InputSection>(this, SHF_ALLOC \| SHF_WRITE, SHT_PROGBITS,
	8, data, ".data");
	sections.push_back(section);

	// For each input file foo that is embedded to a result as a binary
	// blob, we define _binary_foo_{start,end,size} symbols, so that
	// user programs can access blobs by name. Non-alphanumeric
	// characters in a filename are replaced with underscore.
	std::string s = "_binary_" + mb.getBufferIdentifier().str();
	for (size_t i = 0; i < s.size(); ++i)
	if (!isAlnum(s[i]))
	s[i] = '_';

	llvm::StringSaver &saver = lld::saver();

	symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"),
	STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0,
	0, section});
	symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"),
	STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
	data.size(), 0, section});
	symtab->addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"),
	STB_GLOBAL, STV_DEFAULT, STT_OBJECT,
	data.size(), 0, nullptr});
	}

	ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
	bool lazy) {
	ELFFileBase *f;
	switch (getELFKind(mb, archiveName)) {
	case ELF32LEKind:
	f = make<ObjFile<ELF32LE>>(mb, archiveName);
	break;
	case ELF32BEKind:
	f = make<ObjFile<ELF32BE>>(mb, archiveName);
	break;
	case ELF64LEKind:
	f = make<ObjFile<ELF64LE>>(mb, archiveName);
	break;
	case ELF64BEKind:
	f = make<ObjFile<ELF64BE>>(mb, archiveName);
	break;
	default:
	llvm_unreachable("getELFKind");
	}
	f->lazy = lazy;
	return f;
	}

	template <class ELFT> void ObjFile<ELFT>::parseLazy() {
	const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
	SymbolTable &symtab = *elf::symtab;

	symbols.resize(eSyms.size());
	for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
	if (eSyms[i].st_shndx != SHN_UNDEF)
	symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));

	// Replace existing symbols with LazyObject symbols.
	//
	// resolve() may trigger this->extract() if an existing symbol is an undefined
	// symbol. If that happens, this function has served its purpose, and we can
	// exit from the loop early.
	for (Symbol *sym : makeArrayRef(symbols).slice(firstGlobal))
	if (sym) {
	sym->resolve(LazyObject{*this});
	if (!lazy)
	return;
	}
	}

	bool InputFile::shouldExtractForCommon(StringRef name) {
	if (isa<BitcodeFile>(this))
	return isBitcodeNonCommonDef(mb, name, archiveName);

	return isNonCommonDef(mb, name, archiveName);
	}

	std::string elf::replaceThinLTOSuffix(StringRef path) {
	StringRef suffix = config->thinLTOObjectSuffixReplace.first;
	StringRef repl = config->thinLTOObjectSuffixReplace.second;

	if (path.consume_back(suffix))
	return (path + repl).str();
	return std::string(path);
	}

	template void BitcodeFile::parse<ELF32LE>();
	template void BitcodeFile::parse<ELF32BE>();
	template void BitcodeFile::parse<ELF64LE>();
	template void BitcodeFile::parse<ELF64BE>();

	template class elf::ObjFile<ELF32LE>;
	template class elf::ObjFile<ELF32BE>;
	template class elf::ObjFile<ELF64LE>;
	template class elf::ObjFile<ELF64BE>;

	template void SharedFile::parse<ELF32LE>();
	template void SharedFile::parse<ELF32BE>();
	template void SharedFile::parse<ELF64LE>();
	template void SharedFile::parse<ELF64BE>();
	diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp
	index ca6cbdfbb8bb..8f267251b7c0 100644
	--- a/lld/MachO/UnwindInfoSection.cpp
	+++ b/lld/MachO/UnwindInfoSection.cpp
	@@ -1,708 +1,733 @@
	//===- UnwindInfoSection.cpp ----------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "UnwindInfoSection.h"
	#include "ConcatOutputSection.h"
	#include "Config.h"
	#include "InputSection.h"
	#include "OutputSection.h"
	#include "OutputSegment.h"
	#include "SymbolTable.h"
	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"

	#include "lld/Common/ErrorHandler.h"
	#include "lld/Common/Memory.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/BinaryFormat/MachO.h"
	#include "llvm/Support/Parallel.h"

	#include <numeric>

	using namespace llvm;
	using namespace llvm::MachO;
	using namespace llvm::support::endian;
	using namespace lld;
	using namespace lld::macho;

	#define COMMON_ENCODINGS_MAX 127
	#define COMPACT_ENCODINGS_MAX 256

	#define SECOND_LEVEL_PAGE_BYTES 4096
	#define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t))
	#define REGULAR_SECOND_LEVEL_ENTRIES_MAX \
	((SECOND_LEVEL_PAGE_BYTES - \
	sizeof(unwind_info_regular_second_level_page_header)) / \
	sizeof(unwind_info_regular_second_level_entry))
	#define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \
	((SECOND_LEVEL_PAGE_BYTES - \
	sizeof(unwind_info_compressed_second_level_page_header)) / \
	sizeof(uint32_t))

	#define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24
	#define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \
	UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0)

	// Compact Unwind format is a Mach-O evolution of DWARF Unwind that
	// optimizes space and exception-time lookup. Most DWARF unwind
	// entries can be replaced with Compact Unwind entries, but the ones
	// that cannot are retained in DWARF form.
	//
	// This comment will address macro-level organization of the pre-link
	// and post-link compact unwind tables. For micro-level organization
	// pertaining to the bitfield layout of the 32-bit compact unwind
	// entries, see libunwind/include/mach-o/compact_unwind_encoding.h
	//
	// Important clarifying factoids:
	//
	// * __LD,__compact_unwind is the compact unwind format for compiler
	// output and linker input. It is never a final output. It could be
	// an intermediate output with the `-r` option which retains relocs.
	//
	// * __TEXT,__unwind_info is the compact unwind format for final
	// linker output. It is never an input.
	//
	// * __TEXT,__eh_frame is the DWARF format for both linker input and output.
	//
	// * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd
	// level) by ascending address, and the pages are referenced by an
	// index (1st level) in the section header.
	//
	// * Following the headers in __TEXT,__unwind_info, the bulk of the
	// section contains a vector of compact unwind entries
	// `{functionOffset, encoding}` sorted by ascending `functionOffset`.
	// Adjacent entries with the same encoding can be folded to great
	// advantage, achieving a 3-order-of-magnitude reduction in the
	// number of entries.
	//
	// * The __TEXT,__unwind_info format can accommodate up to 127 unique
	// encodings for the space-efficient compressed format. In practice,
	// fewer than a dozen unique encodings are used by C++ programs of
	// all sizes. Therefore, we don't even bother implementing the regular
	// non-compressed format. Time will tell if anyone in the field ever
	// overflows the 127-encodings limit.
	//
	// Refer to the definition of unwind_info_section_header in
	// compact_unwind_encoding.h for an overview of the format we are encoding
	// here.

	// TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410
	// TODO(gkm): how do we align the 2nd-level pages?

	// The offsets of various fields in the on-disk representation of each compact
	// unwind entry.
	struct CompactUnwindOffsets {
	uint32_t functionAddress;
	uint32_t functionLength;
	uint32_t encoding;
	uint32_t personality;
	uint32_t lsda;

	CompactUnwindOffsets(size_t wordSize) {
	if (wordSize == 8)
	init<uint64_t>();
	else {
	assert(wordSize == 4);
	init<uint32_t>();
	}
	}

	private:
	template <class Ptr> void init() {
	functionAddress = offsetof(Layout<Ptr>, functionAddress);
	functionLength = offsetof(Layout<Ptr>, functionLength);
	encoding = offsetof(Layout<Ptr>, encoding);
	personality = offsetof(Layout<Ptr>, personality);
	lsda = offsetof(Layout<Ptr>, lsda);
	}

	template <class Ptr> struct Layout {
	Ptr functionAddress;
	uint32_t functionLength;
	compact_unwind_encoding_t encoding;
	Ptr personality;
	Ptr lsda;
	};
	};

	// LLD's internal representation of a compact unwind entry.
	struct CompactUnwindEntry {
	uint64_t functionAddress;
	uint32_t functionLength;
	compact_unwind_encoding_t encoding;
	Symbol *personality;
	InputSection *lsda;
	};

	using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>;

	struct SecondLevelPage {
	uint32_t kind;
	size_t entryIndex;
	size_t entryCount;
	size_t byteCount;
	std::vector<compact_unwind_encoding_t> localEncodings;
	EncodingMap localEncodingIndexes;
	};

	// UnwindInfoSectionImpl allows us to avoid cluttering our header file with a
	// lengthy definition of UnwindInfoSection.
	class UnwindInfoSectionImpl final : public UnwindInfoSection {
	public:
	UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {}
	uint64_t getSize() const override { return unwindInfoSize; }
	- void prepareRelocations() override;
	+ void prepare() override;
	void finalize() override;
	void writeTo(uint8_t *buf) const override;

	private:
	void prepareRelocations(ConcatInputSection *);
	void relocateCompactUnwind(std::vector<CompactUnwindEntry> &);
	void encodePersonalities();
	+ Symbol canonicalizePersonality(Symbol );

	uint64_t unwindInfoSize = 0;
	std::vector<decltype(symbols)::value_type> symbolsVec;
	CompactUnwindOffsets cuOffsets;
	std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings;
	EncodingMap commonEncodingIndexes;
	// The entries here will be in the same order as their originating symbols
	// in symbolsVec.
	std::vector<CompactUnwindEntry> cuEntries;
	// Indices into the cuEntries vector.
	std::vector<size_t> cuIndices;
	std::vector<Symbol *> personalities;
	SmallDenseMap<std::pair<InputSection , uint64_t / addend />, Symbol >
	personalityTable;
	// Indices into cuEntries for CUEs with a non-null LSDA.
	std::vector<size_t> entriesWithLsda;
	// Map of cuEntries index to an index within the LSDA array.
	DenseMap<size_t, uint32_t> lsdaIndex;
	std::vector<SecondLevelPage> secondLevelPages;
	uint64_t level2PagesOffset = 0;
	};

	UnwindInfoSection::UnwindInfoSection()
	: SyntheticSection(segment_names::text, section_names::unwindInfo) {
	align = 4;
	}

	// Record function symbols that may need entries emitted in __unwind_info, which
	// stores unwind data for address ranges.
	//
	// Note that if several adjacent functions have the same unwind encoding and
	// personality function and no LSDA, they share one unwind entry. For this to
	// work, functions without unwind info need explicit "no unwind info" unwind
	// entries -- else the unwinder would think they have the unwind info of the
	// closest function with unwind info right before in the image. Thus, we add
	// function symbols for each unique address regardless of whether they have
	// associated unwind info.
	void UnwindInfoSection::addSymbol(const Defined *d) {
	if (d->unwindEntry)
	allEntriesAreOmitted = false;
	// We don't yet know the final output address of this symbol, but we know that
	// they are uniquely determined by a combination of the isec and value, so
	// we use that as the key here.
	auto p = symbols.insert({{d->isec, d->value}, d});
	// If we have multiple symbols at the same address, only one of them can have
	// an associated unwind entry.
	if (!p.second && d->unwindEntry) {
	assert(!p.first->second->unwindEntry);
	p.first->second = d;
	}
	}

	-void UnwindInfoSectionImpl::prepareRelocations() {
	+void UnwindInfoSectionImpl::prepare() {
	// This iteration needs to be deterministic, since prepareRelocations may add
	// entries to the GOT. Hence the use of a MapVector for
	// UnwindInfoSection::symbols.
	for (const Defined *d : make_second_range(symbols))
	- if (d->unwindEntry &&
	- d->unwindEntry->getName() == section_names::compactUnwind)
	- prepareRelocations(d->unwindEntry);
	+ if (d->unwindEntry) {
	+ if (d->unwindEntry->getName() == section_names::compactUnwind) {
	+ prepareRelocations(d->unwindEntry);
	+ } else {
	+ // We don't have to add entries to the GOT here because FDEs have
	+ // explicit GOT relocations, so Writer::scanRelocations() will add those
	+ // GOT entries. However, we still need to canonicalize the personality
	+ // pointers (like prepareRelocations() does for CU entries) in order
	+ // to avoid overflowing the 3-personality limit.
	+ FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
	+ fde.personality = canonicalizePersonality(fde.personality);
	+ }
	+ }
	}

	// Compact unwind relocations have different semantics, so we handle them in a
	// separate code path from regular relocations. First, we do not wish to add
	// rebase opcodes for __LD,__compact_unwind, because that section doesn't
	// actually end up in the final binary. Second, personality pointers always
	// reside in the GOT and must be treated specially.
	void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) {
	assert(!isec->shouldOmitFromOutput() &&
	"__compact_unwind section should not be omitted");

	// FIXME: Make this skip relocations for CompactUnwindEntries that
	// point to dead-stripped functions. That might save some amount of
	// work. But since there are usually just few personality functions
	// that are referenced from many places, at least some of them likely
	// live, it wouldn't reduce number of got entries.
	for (size_t i = 0; i < isec->relocs.size(); ++i) {
	Reloc &r = isec->relocs[i];
	assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED));

	// Functions and LSDA entries always reside in the same object file as the
	// compact unwind entries that references them, and thus appear as section
	// relocs. There is no need to prepare them. We only prepare relocs for
	// personality functions.
	if (r.offset != cuOffsets.personality)
	continue;

	if (auto s = r.referent.dyn_cast<Symbol >()) {
	// Personality functions are nearly always system-defined (e.g.,
	// ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an
	// application provides its own personality function, it might be
	// referenced by an extern Defined symbol reloc, or a local section reloc.
	if (auto *defined = dyn_cast<Defined>(s)) {
	// XXX(vyng) This is a a special case for handling duplicate personality
	// symbols. Note that LD64's behavior is a bit different and it is
	// inconsistent with how symbol resolution usually work
	//
	// So we've decided not to follow it. Instead, simply pick the symbol
	// with the same name from the symbol table to replace the local one.
	//
	// (See discussions/alternatives already considered on D107533)
	if (!defined->isExternal())
	if (Symbol *sym = symtab->find(defined->getName()))
	if (!sym->isLazy())
	r.referent = s = sym;
	}
	if (auto *undefined = dyn_cast<Undefined>(s)) {
	treatUndefinedSymbol(*undefined, isec, r.offset);
	// treatUndefinedSymbol() can replace s with a DylibSymbol; re-check.
	if (isa<Undefined>(s))
	continue;
	}

	+ // Similar to canonicalizePersonality(), but we also register a GOT entry.
	if (auto *defined = dyn_cast<Defined>(s)) {
	// Check if we have created a synthetic symbol at the same address.
	Symbol *&personality =
	personalityTable[{defined->isec, defined->value}];
	if (personality == nullptr) {
	personality = defined;
	in.got->addEntry(defined);
	} else if (personality != defined) {
	r.referent = personality;
	}
	continue;
	}
	+
	assert(isa<DylibSymbol>(s));
	in.got->addEntry(s);
	continue;
	}

	if (auto referentIsec = r.referent.dyn_cast<InputSection >()) {
	assert(!isCoalescedWeak(referentIsec));
	// Personality functions can be referenced via section relocations
	// if they live in the same object file. Create placeholder synthetic
	// symbols for them in the GOT.
	Symbol *&s = personalityTable[{referentIsec, r.addend}];
	if (s == nullptr) {
	// This runs after dead stripping, so the noDeadStrip argument does not
	// matter.
	s = make<Defined>("<internal>", /file=/nullptr, referentIsec,
	r.addend, /size=/0, /isWeakDef=/false,
	/isExternal=/false, /isPrivateExtern=/false,
	/includeInSymtab=/true,
	/isThumb=/false, /isReferencedDynamically=/false,
	/noDeadStrip=/false);
	s->used = true;
	in.got->addEntry(s);
	}
	r.referent = s;
	r.addend = 0;
	}
	}
	}

	+Symbol UnwindInfoSectionImpl::canonicalizePersonality(Symbol personality) {
	+ if (auto *defined = dyn_cast_or_null<Defined>(personality)) {
	+ // Check if we have created a synthetic symbol at the same address.
	+ Symbol *&synth = personalityTable[{defined->isec, defined->value}];
	+ if (synth == nullptr)
	+ synth = defined;
	+ else if (synth != defined)
	+ return synth;
	+ }
	+ return personality;
	+}
	+
	// We need to apply the relocations to the pre-link compact unwind section
	// before converting it to post-link form. There should only be absolute
	// relocations here: since we are not emitting the pre-link CU section, there
	// is no source address to make a relative location meaningful.
	void UnwindInfoSectionImpl::relocateCompactUnwind(
	std::vector<CompactUnwindEntry> &cuEntries) {
	parallelFor(0, symbolsVec.size(), [&](size_t i) {
	CompactUnwindEntry &cu = cuEntries[i];
	const Defined *d = symbolsVec[i].second;
	cu.functionAddress = d->getVA();
	if (!d->unwindEntry)
	return;

	// If we have DWARF unwind info, create a CU entry that points to it.
	if (d->unwindEntry->getName() == section_names::ehFrame) {
	cu.encoding = target->modeDwarfEncoding \| d->unwindEntry->outSecOff;
	const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry];
	cu.functionLength = fde.funcLength;
	cu.personality = fde.personality;
	cu.lsda = fde.lsda;
	return;
	}

	assert(d->unwindEntry->getName() == section_names::compactUnwind);

	auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) -
	target->wordSize;
	cu.functionLength =
	support::endian::read32le(buf + cuOffsets.functionLength);
	cu.encoding = support::endian::read32le(buf + cuOffsets.encoding);
	for (const Reloc &r : d->unwindEntry->relocs) {
	if (r.offset == cuOffsets.personality) {
	cu.personality = r.referent.get<Symbol *>();
	} else if (r.offset == cuOffsets.lsda) {
	if (auto referentSym = r.referent.dyn_cast<Symbol >())
	cu.lsda = cast<Defined>(referentSym)->isec;
	else
	cu.lsda = r.referent.get<InputSection *>();
	}
	}
	});
	}

	// There should only be a handful of unique personality pointers, so we can
	// encode them as 2-bit indices into a small array.
	void UnwindInfoSectionImpl::encodePersonalities() {
	for (size_t idx : cuIndices) {
	CompactUnwindEntry &cu = cuEntries[idx];
	if (cu.personality == nullptr)
	continue;
	// Linear search is fast enough for a small array.
	auto it = find(personalities, cu.personality);
	uint32_t personalityIndex; // 1-based index
	if (it != personalities.end()) {
	personalityIndex = std::distance(personalities.begin(), it) + 1;
	} else {
	personalities.push_back(cu.personality);
	personalityIndex = personalities.size();
	}
	cu.encoding \|=
	personalityIndex << countTrailingZeros(
	static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK));
	}
	if (personalities.size() > 3)
	error("too many personalities (" + Twine(personalities.size()) +
	") for compact unwind to encode");
	}

	static bool canFoldEncoding(compact_unwind_encoding_t encoding) {
	// From compact_unwind_encoding.h:
	// UNWIND_X86_64_MODE_STACK_IND:
	// A "frameless" (RBP not used as frame pointer) function large constant
	// stack size. This case is like the previous, except the stack size is too
	// large to encode in the compact unwind encoding. Instead it requires that
	// the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact
	// encoding contains the offset to the nnnnnnnn value in the function in
	// UNWIND_X86_64_FRAMELESS_STACK_SIZE.
	// Since this means the unwinder has to look at the `subq` in the function
	// of the unwind info's unwind address, two functions that have identical
	// unwind info can't be folded if it's using this encoding since both
	// entries need unique addresses.
	static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_MASK) ==
	static_cast<uint32_t>(UNWIND_X86_MODE_MASK),
	"");
	static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) ==
	static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND),
	"");
	if ((target->cpuType == CPU_TYPE_X86_64 \|\| target->cpuType == CPU_TYPE_X86) &&
	(encoding & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) {
	// FIXME: Consider passing in the two function addresses and getting
	// their two stack sizes off the `subq` and only returning false if they're
	// actually different.
	return false;
	}
	return true;
	}

	// Scan the __LD,__compact_unwind entries and compute the space needs of
	// __TEXT,__unwind_info and __TEXT,__eh_frame.
	void UnwindInfoSectionImpl::finalize() {
	if (symbols.empty())
	return;

	// At this point, the address space for __TEXT,__text has been
	// assigned, so we can relocate the __LD,__compact_unwind entries
	// into a temporary buffer. Relocation is necessary in order to sort
	// the CU entries by function address. Sorting is necessary so that
	// we can fold adjacent CU entries with identical encoding+personality
	// and without any LSDA. Folding is necessary because it reduces the
	// number of CU entries by as much as 3 orders of magnitude!
	cuEntries.resize(symbols.size());
	// The "map" part of the symbols MapVector was only needed for deduplication
	// in addSymbol(). Now that we are done adding, move the contents to a plain
	// std::vector for indexed access.
	symbolsVec = symbols.takeVector();
	relocateCompactUnwind(cuEntries);

	// Rather than sort & fold the 32-byte entries directly, we create a
	// vector of indices to entries and sort & fold that instead.
	cuIndices.resize(cuEntries.size());
	std::iota(cuIndices.begin(), cuIndices.end(), 0);
	llvm::sort(cuIndices, [&](size_t a, size_t b) {
	return cuEntries[a].functionAddress < cuEntries[b].functionAddress;
	});

	// Fold adjacent entries with matching encoding+personality and without LSDA
	// We use three iterators on the same cuIndices to fold in-situ:
	// (1) `foldBegin` is the first of a potential sequence of matching entries
	// (2) `foldEnd` is the first non-matching entry after `foldBegin`.
	// The semi-open interval [ foldBegin .. foldEnd ) contains a range
	// entries that can be folded into a single entry and written to ...
	// (3) `foldWrite`
	auto foldWrite = cuIndices.begin();
	for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) {
	auto foldEnd = foldBegin;
	// Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from
	// a base address. The base address is normally not contained directly in
	// the LSDA, and in that case, the personality function treats the starting
	// address of the function (which is computed by the unwinder) as the base
	// address and interprets the LSDA accordingly. The unwinder computes the
	// starting address of a function as the address associated with its CU
	// entry. For this reason, we cannot fold adjacent entries if they have an
	// LSDA, because folding would make the unwinder compute the wrong starting
	// address for the functions with the folded entries, which in turn would
	// cause the personality function to misinterpret the LSDA for those
	// functions. In the very rare case where the base address is encoded
	// directly in the LSDA, two functions at different addresses would
	// necessarily have different LSDAs, so their CU entries would not have been
	// folded anyway.
	while (++foldEnd < cuIndices.end() &&
	cuEntries[foldBegin].encoding == cuEntries[foldEnd].encoding &&
	!cuEntries[foldBegin].lsda && !cuEntries[foldEnd].lsda &&
	// If we've gotten to this point, we don't have an LSDA, which should
	// also imply that we don't have a personality function, since in all
	// likelihood a personality function needs the LSDA to do anything
	// useful. It can be technically valid to have a personality function
	// and no LSDA though (e.g. the C++ personality __gxx_personality_v0
	// is just a no-op without LSDA), so we still check for personality
	// function equivalence to handle that case.
	cuEntries[*foldBegin].personality ==
	cuEntries[*foldEnd].personality &&
	canFoldEncoding(cuEntries[*foldEnd].encoding))
	;
	foldWrite++ = foldBegin;
	foldBegin = foldEnd;
	}
	cuIndices.erase(foldWrite, cuIndices.end());

	encodePersonalities();

	// Count frequencies of the folded encodings
	EncodingMap encodingFrequencies;
	for (size_t idx : cuIndices)
	encodingFrequencies[cuEntries[idx].encoding]++;

	// Make a vector of encodings, sorted by descending frequency
	for (const auto &frequency : encodingFrequencies)
	commonEncodings.emplace_back(frequency);
	llvm::sort(commonEncodings,
	[](const std::pair<compact_unwind_encoding_t, size_t> &a,
	const std::pair<compact_unwind_encoding_t, size_t> &b) {
	if (a.second == b.second)
	// When frequencies match, secondarily sort on encoding
	// to maintain parity with validate-unwind-info.py
	return a.first > b.first;
	return a.second > b.second;
	});

	// Truncate the vector to 127 elements.
	// Common encoding indexes are limited to 0..126, while encoding
	// indexes 127..255 are local to each second-level page
	if (commonEncodings.size() > COMMON_ENCODINGS_MAX)
	commonEncodings.resize(COMMON_ENCODINGS_MAX);

	// Create a map from encoding to common-encoding-table index
	for (size_t i = 0; i < commonEncodings.size(); i++)
	commonEncodingIndexes[commonEncodings[i].first] = i;

	// Split folded encodings into pages, where each page is limited by ...
	// (a) 4 KiB capacity
	// (b) 24-bit difference between first & final function address
	// (c) 8-bit compact-encoding-table index,
	// for which 0..126 references the global common-encodings table,
	// and 127..255 references a local per-second-level-page table.
	// First we try the compact format and determine how many entries fit.
	// If more entries fit in the regular format, we use that.
	for (size_t i = 0; i < cuIndices.size();) {
	size_t idx = cuIndices[i];
	secondLevelPages.emplace_back();
	SecondLevelPage &page = secondLevelPages.back();
	page.entryIndex = i;
	uint64_t functionAddressMax =
	cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK;
	size_t n = commonEncodings.size();
	size_t wordsRemaining =
	SECOND_LEVEL_PAGE_WORDS -
	sizeof(unwind_info_compressed_second_level_page_header) /
	sizeof(uint32_t);
	while (wordsRemaining >= 1 && i < cuIndices.size()) {
	idx = cuIndices[i];
	const CompactUnwindEntry *cuPtr = &cuEntries[idx];
	if (cuPtr->functionAddress >= functionAddressMax) {
	break;
	} else if (commonEncodingIndexes.count(cuPtr->encoding) \|\|
	page.localEncodingIndexes.count(cuPtr->encoding)) {
	i++;
	wordsRemaining--;
	} else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) {
	page.localEncodings.emplace_back(cuPtr->encoding);
	page.localEncodingIndexes[cuPtr->encoding] = n++;
	i++;
	wordsRemaining -= 2;
	} else {
	break;
	}
	}
	page.entryCount = i - page.entryIndex;

	// If this is not the final page, see if it's possible to fit more
	// entries by using the regular format. This can happen when there
	// are many unique encodings, and we we saturated the local
	// encoding table early.
	if (i < cuIndices.size() &&
	page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) {
	page.kind = UNWIND_SECOND_LEVEL_REGULAR;
	page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX,
	cuIndices.size() - page.entryIndex);
	i = page.entryIndex + page.entryCount;
	} else {
	page.kind = UNWIND_SECOND_LEVEL_COMPRESSED;
	}
	}

	for (size_t idx : cuIndices) {
	lsdaIndex[idx] = entriesWithLsda.size();
	if (cuEntries[idx].lsda)
	entriesWithLsda.push_back(idx);
	}

	// compute size of __TEXT,__unwind_info section
	level2PagesOffset = sizeof(unwind_info_section_header) +
	commonEncodings.size() * sizeof(uint32_t) +
	personalities.size() * sizeof(uint32_t) +
	// The extra second-level-page entry is for the sentinel
	(secondLevelPages.size() + 1) *
	sizeof(unwind_info_section_header_index_entry) +
	entriesWithLsda.size() *
	sizeof(unwind_info_section_header_lsda_index_entry);
	unwindInfoSize =
	level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES;
	}

	// All inputs are relocated and output addresses are known, so write!

	void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const {
	assert(!cuIndices.empty() && "call only if there is unwind info");

	// section header
	auto uip = reinterpret_cast<unwind_info_section_header >(buf);
	uip->version = 1;
	uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header);
	uip->commonEncodingsArrayCount = commonEncodings.size();
	uip->personalityArraySectionOffset =
	uip->commonEncodingsArraySectionOffset +
	(uip->commonEncodingsArrayCount * sizeof(uint32_t));
	uip->personalityArrayCount = personalities.size();
	uip->indexSectionOffset = uip->personalityArraySectionOffset +
	(uip->personalityArrayCount * sizeof(uint32_t));
	uip->indexCount = secondLevelPages.size() + 1;

	// Common encodings
	auto i32p = reinterpret_cast<uint32_t >(&uip[1]);
	for (const auto &encoding : commonEncodings)
	*i32p++ = encoding.first;

	// Personalities
	for (const Symbol *personality : personalities)
	*i32p++ = personality->getGotVA() - in.header->addr;

	// Level-1 index
	uint32_t lsdaOffset =
	uip->indexSectionOffset +
	uip->indexCount * sizeof(unwind_info_section_header_index_entry);
	uint64_t l2PagesOffset = level2PagesOffset;
	auto iep = reinterpret_cast<unwind_info_section_header_index_entry >(i32p);
	for (const SecondLevelPage &page : secondLevelPages) {
	size_t idx = cuIndices[page.entryIndex];
	iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr;
	iep->secondLevelPagesSectionOffset = l2PagesOffset;
	iep->lsdaIndexArraySectionOffset =
	lsdaOffset + lsdaIndex.lookup(idx) *
	sizeof(unwind_info_section_header_lsda_index_entry);
	iep++;
	l2PagesOffset += SECOND_LEVEL_PAGE_BYTES;
	}
	// Level-1 sentinel
	const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()];
	iep->functionOffset =
	cuEnd.functionAddress - in.header->addr + cuEnd.functionLength;
	iep->secondLevelPagesSectionOffset = 0;
	iep->lsdaIndexArraySectionOffset =
	lsdaOffset + entriesWithLsda.size() *
	sizeof(unwind_info_section_header_lsda_index_entry);
	iep++;

	// LSDAs
	auto *lep =
	reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep);
	for (size_t idx : entriesWithLsda) {
	const CompactUnwindEntry &cu = cuEntries[idx];
	lep->lsdaOffset = cu.lsda->getVA(/off=/0) - in.header->addr;
	lep->functionOffset = cu.functionAddress - in.header->addr;
	lep++;
	}

	// Level-2 pages
	auto pp = reinterpret_cast<uint32_t >(lep);
	for (const SecondLevelPage &page : secondLevelPages) {
	if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) {
	uintptr_t functionAddressBase =
	cuEntries[cuIndices[page.entryIndex]].functionAddress;
	auto *p2p =
	reinterpret_cast<unwind_info_compressed_second_level_page_header *>(
	pp);
	p2p->kind = page.kind;
	p2p->entryPageOffset =
	sizeof(unwind_info_compressed_second_level_page_header);
	p2p->entryCount = page.entryCount;
	p2p->encodingsPageOffset =
	p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t);
	p2p->encodingsCount = page.localEncodings.size();
	auto ep = reinterpret_cast<uint32_t >(&p2p[1]);
	for (size_t i = 0; i < page.entryCount; i++) {
	const CompactUnwindEntry &cue =
	cuEntries[cuIndices[page.entryIndex + i]];
	auto it = commonEncodingIndexes.find(cue.encoding);
	if (it == commonEncodingIndexes.end())
	it = page.localEncodingIndexes.find(cue.encoding);
	*ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) \|
	(cue.functionAddress - functionAddressBase);
	}
	if (!page.localEncodings.empty())
	memcpy(ep, page.localEncodings.data(),
	page.localEncodings.size() * sizeof(uint32_t));
	} else {
	auto *p2p =
	reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp);
	p2p->kind = page.kind;
	p2p->entryPageOffset =
	sizeof(unwind_info_regular_second_level_page_header);
	p2p->entryCount = page.entryCount;
	auto ep = reinterpret_cast<uint32_t >(&p2p[1]);
	for (size_t i = 0; i < page.entryCount; i++) {
	const CompactUnwindEntry &cue =
	cuEntries[cuIndices[page.entryIndex + i]];
	*ep++ = cue.functionAddress;
	*ep++ = cue.encoding;
	}
	}
	pp += SECOND_LEVEL_PAGE_WORDS;
	}
	}

	UnwindInfoSection *macho::makeUnwindInfoSection() {
	return make<UnwindInfoSectionImpl>();
	}
	diff --git a/lld/MachO/UnwindInfoSection.h b/lld/MachO/UnwindInfoSection.h
	index c6b334731c75..f2bc3213a127 100644
	--- a/lld/MachO/UnwindInfoSection.h
	+++ b/lld/MachO/UnwindInfoSection.h
	@@ -1,43 +1,43 @@
	//===- UnwindInfoSection.h ------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLD_MACHO_UNWIND_INFO_H
	#define LLD_MACHO_UNWIND_INFO_H

	#include "ConcatOutputSection.h"
	#include "SyntheticSections.h"
	#include "llvm/ADT/MapVector.h"

	#include "mach-o/compact_unwind_encoding.h"

	namespace lld {
	namespace macho {

	class UnwindInfoSection : public SyntheticSection {
	public:
	// If all functions are free of unwind info, we can omit the unwind info
	// section entirely.
	bool isNeeded() const override { return !allEntriesAreOmitted; }
	void addSymbol(const Defined *);
	- virtual void prepareRelocations() = 0;
	+ virtual void prepare() = 0;

	protected:
	UnwindInfoSection();

	llvm::MapVector<std::pair<const InputSection , uint64_t /Defined::value*/>,
	const Defined *>
	symbols;
	bool allEntriesAreOmitted = true;
	};

	UnwindInfoSection *makeUnwindInfoSection();

	} // namespace macho
	} // namespace lld

	#endif
	diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
	index 3c44a60f4be2..ce9672dd0b4f 100644
	--- a/lld/MachO/Writer.cpp
	+++ b/lld/MachO/Writer.cpp
	@@ -1,1245 +1,1245 @@
	//===- Writer.cpp ---------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "Writer.h"
	#include "ConcatOutputSection.h"
	#include "Config.h"
	#include "InputFiles.h"
	#include "InputSection.h"
	#include "MapFile.h"
	#include "OutputSection.h"
	#include "OutputSegment.h"
	#include "SectionPriorities.h"
	#include "SymbolTable.h"
	#include "Symbols.h"
	#include "SyntheticSections.h"
	#include "Target.h"
	#include "UnwindInfoSection.h"
	#include "llvm/Support/Parallel.h"

	#include "lld/Common/Arrays.h"
	#include "lld/Common/CommonLinkerContext.h"
	#include "llvm/BinaryFormat/MachO.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/Support/LEB128.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/Parallel.h"
	#include "llvm/Support/Path.h"
	#include "llvm/Support/ThreadPool.h"
	#include "llvm/Support/TimeProfiler.h"
	#include "llvm/Support/xxhash.h"

	#include <algorithm>

	using namespace llvm;
	using namespace llvm::MachO;
	using namespace llvm::sys;
	using namespace lld;
	using namespace lld::macho;

	namespace {
	class LCUuid;

	class Writer {
	public:
	Writer() : buffer(errorHandler().outputBuffer) {}

	void treatSpecialUndefineds();
	void scanRelocations();
	void scanSymbols();
	template <class LP> void createOutputSections();
	template <class LP> void createLoadCommands();
	void finalizeAddresses();
	void finalizeLinkEditSegment();
	void assignAddresses(OutputSegment *);

	void openFile();
	void writeSections();
	void writeUuid();
	void writeCodeSignature();
	void writeOutputFile();

	template <class LP> void run();

	ThreadPool threadPool;
	std::unique_ptr<FileOutputBuffer> &buffer;
	uint64_t addr = 0;
	uint64_t fileOff = 0;
	MachHeaderSection *header = nullptr;
	StringTableSection *stringTableSection = nullptr;
	SymtabSection *symtabSection = nullptr;
	IndirectSymtabSection *indirectSymtabSection = nullptr;
	CodeSignatureSection *codeSignatureSection = nullptr;
	DataInCodeSection *dataInCodeSection = nullptr;
	FunctionStartsSection *functionStartsSection = nullptr;

	LCUuid *uuidCommand = nullptr;
	OutputSegment *linkEditSegment = nullptr;
	};

	// LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
	class LCDyldInfo final : public LoadCommand {
	public:
	LCDyldInfo(RebaseSection rebaseSection, BindingSection bindingSection,
	WeakBindingSection *weakBindingSection,
	LazyBindingSection *lazyBindingSection,
	ExportSection *exportSection)
	: rebaseSection(rebaseSection), bindingSection(bindingSection),
	weakBindingSection(weakBindingSection),
	lazyBindingSection(lazyBindingSection), exportSection(exportSection) {}

	uint32_t getSize() const override { return sizeof(dyld_info_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<dyld_info_command >(buf);
	c->cmd = LC_DYLD_INFO_ONLY;
	c->cmdsize = getSize();
	if (rebaseSection->isNeeded()) {
	c->rebase_off = rebaseSection->fileOff;
	c->rebase_size = rebaseSection->getFileSize();
	}
	if (bindingSection->isNeeded()) {
	c->bind_off = bindingSection->fileOff;
	c->bind_size = bindingSection->getFileSize();
	}
	if (weakBindingSection->isNeeded()) {
	c->weak_bind_off = weakBindingSection->fileOff;
	c->weak_bind_size = weakBindingSection->getFileSize();
	}
	if (lazyBindingSection->isNeeded()) {
	c->lazy_bind_off = lazyBindingSection->fileOff;
	c->lazy_bind_size = lazyBindingSection->getFileSize();
	}
	if (exportSection->isNeeded()) {
	c->export_off = exportSection->fileOff;
	c->export_size = exportSection->getFileSize();
	}
	}

	RebaseSection *rebaseSection;
	BindingSection *bindingSection;
	WeakBindingSection *weakBindingSection;
	LazyBindingSection *lazyBindingSection;
	ExportSection *exportSection;
	};

	class LCSubFramework final : public LoadCommand {
	public:
	LCSubFramework(StringRef umbrella) : umbrella(umbrella) {}

	uint32_t getSize() const override {
	return alignTo(sizeof(sub_framework_command) + umbrella.size() + 1,
	target->wordSize);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<sub_framework_command >(buf);
	buf += sizeof(sub_framework_command);

	c->cmd = LC_SUB_FRAMEWORK;
	c->cmdsize = getSize();
	c->umbrella = sizeof(sub_framework_command);

	memcpy(buf, umbrella.data(), umbrella.size());
	buf[umbrella.size()] = '\0';
	}

	private:
	const StringRef umbrella;
	};

	class LCFunctionStarts final : public LoadCommand {
	public:
	explicit LCFunctionStarts(FunctionStartsSection *functionStartsSection)
	: functionStartsSection(functionStartsSection) {}

	uint32_t getSize() const override { return sizeof(linkedit_data_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<linkedit_data_command >(buf);
	c->cmd = LC_FUNCTION_STARTS;
	c->cmdsize = getSize();
	c->dataoff = functionStartsSection->fileOff;
	c->datasize = functionStartsSection->getFileSize();
	}

	private:
	FunctionStartsSection *functionStartsSection;
	};

	class LCDataInCode final : public LoadCommand {
	public:
	explicit LCDataInCode(DataInCodeSection *dataInCodeSection)
	: dataInCodeSection(dataInCodeSection) {}

	uint32_t getSize() const override { return sizeof(linkedit_data_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<linkedit_data_command >(buf);
	c->cmd = LC_DATA_IN_CODE;
	c->cmdsize = getSize();
	c->dataoff = dataInCodeSection->fileOff;
	c->datasize = dataInCodeSection->getFileSize();
	}

	private:
	DataInCodeSection *dataInCodeSection;
	};

	class LCDysymtab final : public LoadCommand {
	public:
	LCDysymtab(SymtabSection *symtabSection,
	IndirectSymtabSection *indirectSymtabSection)
	: symtabSection(symtabSection),
	indirectSymtabSection(indirectSymtabSection) {}

	uint32_t getSize() const override { return sizeof(dysymtab_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<dysymtab_command >(buf);
	c->cmd = LC_DYSYMTAB;
	c->cmdsize = getSize();

	c->ilocalsym = 0;
	c->iextdefsym = c->nlocalsym = symtabSection->getNumLocalSymbols();
	c->nextdefsym = symtabSection->getNumExternalSymbols();
	c->iundefsym = c->iextdefsym + c->nextdefsym;
	c->nundefsym = symtabSection->getNumUndefinedSymbols();

	c->indirectsymoff = indirectSymtabSection->fileOff;
	c->nindirectsyms = indirectSymtabSection->getNumSymbols();
	}

	SymtabSection *symtabSection;
	IndirectSymtabSection *indirectSymtabSection;
	};

	template <class LP> class LCSegment final : public LoadCommand {
	public:
	LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {}

	uint32_t getSize() const override {
	return sizeof(typename LP::segment_command) +
	seg->numNonHiddenSections() * sizeof(typename LP::section);
	}

	void writeTo(uint8_t *buf) const override {
	using SegmentCommand = typename LP::segment_command;
	using SectionHeader = typename LP::section;

	auto c = reinterpret_cast<SegmentCommand >(buf);
	buf += sizeof(SegmentCommand);

	c->cmd = LP::segmentLCType;
	c->cmdsize = getSize();
	memcpy(c->segname, name.data(), name.size());
	c->fileoff = seg->fileOff;
	c->maxprot = seg->maxProt;
	c->initprot = seg->initProt;

	c->vmaddr = seg->addr;
	c->vmsize = seg->vmSize;
	c->filesize = seg->fileSize;
	c->nsects = seg->numNonHiddenSections();

	for (const OutputSection *osec : seg->getSections()) {
	if (osec->isHidden())
	continue;

	auto sectHdr = reinterpret_cast<SectionHeader >(buf);
	buf += sizeof(SectionHeader);

	memcpy(sectHdr->sectname, osec->name.data(), osec->name.size());
	memcpy(sectHdr->segname, name.data(), name.size());

	sectHdr->addr = osec->addr;
	sectHdr->offset = osec->fileOff;
	sectHdr->align = Log2_32(osec->align);
	sectHdr->flags = osec->flags;
	sectHdr->size = osec->getSize();
	sectHdr->reserved1 = osec->reserved1;
	sectHdr->reserved2 = osec->reserved2;
	}
	}

	private:
	StringRef name;
	OutputSegment *seg;
	};

	class LCMain final : public LoadCommand {
	uint32_t getSize() const override {
	return sizeof(structs::entry_point_command);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<structs::entry_point_command >(buf);
	c->cmd = LC_MAIN;
	c->cmdsize = getSize();

	if (config->entry->isInStubs())
	c->entryoff =
	in.stubs->fileOff + config->entry->stubsIndex * target->stubSize;
	else
	c->entryoff = config->entry->getVA() - in.header->addr;

	c->stacksize = 0;
	}
	};

	class LCSymtab final : public LoadCommand {
	public:
	LCSymtab(SymtabSection symtabSection, StringTableSection stringTableSection)
	: symtabSection(symtabSection), stringTableSection(stringTableSection) {}

	uint32_t getSize() const override { return sizeof(symtab_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<symtab_command >(buf);
	c->cmd = LC_SYMTAB;
	c->cmdsize = getSize();
	c->symoff = symtabSection->fileOff;
	c->nsyms = symtabSection->getNumSymbols();
	c->stroff = stringTableSection->fileOff;
	c->strsize = stringTableSection->getFileSize();
	}

	SymtabSection *symtabSection = nullptr;
	StringTableSection *stringTableSection = nullptr;
	};

	// There are several dylib load commands that share the same structure:
	// * LC_LOAD_DYLIB
	// * LC_ID_DYLIB
	// * LC_REEXPORT_DYLIB
	class LCDylib final : public LoadCommand {
	public:
	LCDylib(LoadCommandType type, StringRef path,
	uint32_t compatibilityVersion = 0, uint32_t currentVersion = 0)
	: type(type), path(path), compatibilityVersion(compatibilityVersion),
	currentVersion(currentVersion) {
	instanceCount++;
	}

	uint32_t getSize() const override {
	return alignTo(sizeof(dylib_command) + path.size() + 1, 8);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<dylib_command >(buf);
	buf += sizeof(dylib_command);

	c->cmd = type;
	c->cmdsize = getSize();
	c->dylib.name = sizeof(dylib_command);
	c->dylib.timestamp = 0;
	c->dylib.compatibility_version = compatibilityVersion;
	c->dylib.current_version = currentVersion;

	memcpy(buf, path.data(), path.size());
	buf[path.size()] = '\0';
	}

	static uint32_t getInstanceCount() { return instanceCount; }
	static void resetInstanceCount() { instanceCount = 0; }

	private:
	LoadCommandType type;
	StringRef path;
	uint32_t compatibilityVersion;
	uint32_t currentVersion;
	static uint32_t instanceCount;
	};

	uint32_t LCDylib::instanceCount = 0;

	class LCLoadDylinker final : public LoadCommand {
	public:
	uint32_t getSize() const override {
	return alignTo(sizeof(dylinker_command) + path.size() + 1, 8);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<dylinker_command >(buf);
	buf += sizeof(dylinker_command);

	c->cmd = LC_LOAD_DYLINKER;
	c->cmdsize = getSize();
	c->name = sizeof(dylinker_command);

	memcpy(buf, path.data(), path.size());
	buf[path.size()] = '\0';
	}

	private:
	// Recent versions of Darwin won't run any binary that has dyld at a
	// different location.
	const StringRef path = "/usr/lib/dyld";
	};

	class LCRPath final : public LoadCommand {
	public:
	explicit LCRPath(StringRef path) : path(path) {}

	uint32_t getSize() const override {
	return alignTo(sizeof(rpath_command) + path.size() + 1, target->wordSize);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<rpath_command >(buf);
	buf += sizeof(rpath_command);

	c->cmd = LC_RPATH;
	c->cmdsize = getSize();
	c->path = sizeof(rpath_command);

	memcpy(buf, path.data(), path.size());
	buf[path.size()] = '\0';
	}

	private:
	StringRef path;
	};

	class LCMinVersion final : public LoadCommand {
	public:
	explicit LCMinVersion(const PlatformInfo &platformInfo)
	: platformInfo(platformInfo) {}

	uint32_t getSize() const override { return sizeof(version_min_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<version_min_command >(buf);
	switch (platformInfo.target.Platform) {
	case PLATFORM_MACOS:
	c->cmd = LC_VERSION_MIN_MACOSX;
	break;
	case PLATFORM_IOS:
	case PLATFORM_IOSSIMULATOR:
	c->cmd = LC_VERSION_MIN_IPHONEOS;
	break;
	case PLATFORM_TVOS:
	case PLATFORM_TVOSSIMULATOR:
	c->cmd = LC_VERSION_MIN_TVOS;
	break;
	case PLATFORM_WATCHOS:
	case PLATFORM_WATCHOSSIMULATOR:
	c->cmd = LC_VERSION_MIN_WATCHOS;
	break;
	default:
	llvm_unreachable("invalid platform");
	break;
	}
	c->cmdsize = getSize();
	c->version = encodeVersion(platformInfo.minimum);
	c->sdk = encodeVersion(platformInfo.sdk);
	}

	private:
	const PlatformInfo &platformInfo;
	};

	class LCBuildVersion final : public LoadCommand {
	public:
	explicit LCBuildVersion(const PlatformInfo &platformInfo)
	: platformInfo(platformInfo) {}

	const int ntools = 1;

	uint32_t getSize() const override {
	return sizeof(build_version_command) + ntools * sizeof(build_tool_version);
	}

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<build_version_command >(buf);
	c->cmd = LC_BUILD_VERSION;
	c->cmdsize = getSize();

	c->platform = static_cast<uint32_t>(platformInfo.target.Platform);
	c->minos = encodeVersion(platformInfo.minimum);
	c->sdk = encodeVersion(platformInfo.sdk);

	c->ntools = ntools;
	auto t = reinterpret_cast<build_tool_version >(&c[1]);
	t->tool = TOOL_LD;
	t->version = encodeVersion(VersionTuple(
	LLVM_VERSION_MAJOR, LLVM_VERSION_MINOR, LLVM_VERSION_PATCH));
	}

	private:
	const PlatformInfo &platformInfo;
	};

	// Stores a unique identifier for the output file based on an MD5 hash of its
	// contents. In order to hash the contents, we must first write them, but
	// LC_UUID itself must be part of the written contents in order for all the
	// offsets to be calculated correctly. We resolve this circular paradox by
	// first writing an LC_UUID with an all-zero UUID, then updating the UUID with
	// its real value later.
	class LCUuid final : public LoadCommand {
	public:
	uint32_t getSize() const override { return sizeof(uuid_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<uuid_command >(buf);
	c->cmd = LC_UUID;
	c->cmdsize = getSize();
	uuidBuf = c->uuid;
	}

	void writeUuid(uint64_t digest) const {
	// xxhash only gives us 8 bytes, so put some fixed data in the other half.
	static_assert(sizeof(uuid_command::uuid) == 16, "unexpected uuid size");
	memcpy(uuidBuf, "LLD\xa1UU1D", 8);
	memcpy(uuidBuf + 8, &digest, 8);

	// RFC 4122 conformance. We need to fix 4 bits in byte 6 and 2 bits in
	// byte 8. Byte 6 is already fine due to the fixed data we put in. We don't
	// want to lose bits of the digest in byte 8, so swap that with a byte of
	// fixed data that happens to have the right bits set.
	std::swap(uuidBuf[3], uuidBuf[8]);

	// Claim that this is an MD5-based hash. It isn't, but this signals that
	// this is not a time-based and not a random hash. MD5 seems like the least
	// bad lie we can put here.
	assert((uuidBuf[6] & 0xf0) == 0x30 && "See RFC 4122 Sections 4.2.2, 4.1.3");
	assert((uuidBuf[8] & 0xc0) == 0x80 && "See RFC 4122 Section 4.2.2");
	}

	mutable uint8_t *uuidBuf;
	};

	template <class LP> class LCEncryptionInfo final : public LoadCommand {
	public:
	uint32_t getSize() const override {
	return sizeof(typename LP::encryption_info_command);
	}

	void writeTo(uint8_t *buf) const override {
	using EncryptionInfo = typename LP::encryption_info_command;
	auto c = reinterpret_cast<EncryptionInfo >(buf);
	buf += sizeof(EncryptionInfo);
	c->cmd = LP::encryptionInfoLCType;
	c->cmdsize = getSize();
	c->cryptoff = in.header->getSize();
	auto it = find_if(outputSegments, [](const OutputSegment *seg) {
	return seg->name == segment_names::text;
	});
	assert(it != outputSegments.end());
	c->cryptsize = (*it)->fileSize - c->cryptoff;
	}
	};

	class LCCodeSignature final : public LoadCommand {
	public:
	LCCodeSignature(CodeSignatureSection *section) : section(section) {}

	uint32_t getSize() const override { return sizeof(linkedit_data_command); }

	void writeTo(uint8_t *buf) const override {
	auto c = reinterpret_cast<linkedit_data_command >(buf);
	c->cmd = LC_CODE_SIGNATURE;
	c->cmdsize = getSize();
	c->dataoff = static_cast<uint32_t>(section->fileOff);
	c->datasize = section->getSize();
	}

	CodeSignatureSection *section;
	};

	} // namespace

	void Writer::treatSpecialUndefineds() {
	if (config->entry)
	if (auto *undefined = dyn_cast<Undefined>(config->entry))
	treatUndefinedSymbol(*undefined, "the entry point");

	// FIXME: This prints symbols that are undefined both in input files and
	// via -u flag twice.
	for (const Symbol *sym : config->explicitUndefineds) {
	if (const auto *undefined = dyn_cast<Undefined>(sym))
	treatUndefinedSymbol(*undefined, "-u");
	}
	// Literal exported-symbol names must be defined, but glob
	// patterns need not match.
	for (const CachedHashStringRef &cachedName :
	config->exportedSymbols.literals) {
	if (const Symbol *sym = symtab->find(cachedName))
	if (const auto *undefined = dyn_cast<Undefined>(sym))
	treatUndefinedSymbol(*undefined, "-exported_symbol(s_list)");
	}
	}

	// Add stubs and bindings where necessary (e.g. if the symbol is a
	// DylibSymbol.)
	static void prepareBranchTarget(Symbol *sym) {
	if (auto *dysym = dyn_cast<DylibSymbol>(sym)) {
	if (in.stubs->addEntry(dysym)) {
	if (sym->isWeakDef()) {
	in.binding->addEntry(dysym, in.lazyPointers->isec,
	sym->stubsIndex * target->wordSize);
	in.weakBinding->addEntry(sym, in.lazyPointers->isec,
	sym->stubsIndex * target->wordSize);
	} else {
	in.lazyBinding->addEntry(dysym);
	}
	}
	} else if (auto *defined = dyn_cast<Defined>(sym)) {
	if (defined->isExternalWeakDef()) {
	if (in.stubs->addEntry(sym)) {
	in.rebase->addEntry(in.lazyPointers->isec,
	sym->stubsIndex * target->wordSize);
	in.weakBinding->addEntry(sym, in.lazyPointers->isec,
	sym->stubsIndex * target->wordSize);
	}
	} else if (defined->interposable) {
	if (in.stubs->addEntry(sym))
	in.lazyBinding->addEntry(sym);
	}
	} else {
	llvm_unreachable("invalid branch target symbol type");
	}
	}

	// Can a symbol's address can only be resolved at runtime?
	static bool needsBinding(const Symbol *sym) {
	if (isa<DylibSymbol>(sym))
	return true;
	if (const auto *defined = dyn_cast<Defined>(sym))
	return defined->isExternalWeakDef() \|\| defined->interposable;
	return false;
	}

	static void prepareSymbolRelocation(Symbol sym, const InputSection isec,
	const lld::macho::Reloc &r) {
	assert(sym->isLive());
	const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);

	if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) {
	prepareBranchTarget(sym);
	} else if (relocAttrs.hasAttr(RelocAttrBits::GOT)) {
	if (relocAttrs.hasAttr(RelocAttrBits::POINTER) \|\| needsBinding(sym))
	in.got->addEntry(sym);
	} else if (relocAttrs.hasAttr(RelocAttrBits::TLV)) {
	if (needsBinding(sym))
	in.tlvPointers->addEntry(sym);
	} else if (relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) {
	// References from thread-local variable sections are treated as offsets
	// relative to the start of the referent section, and therefore have no
	// need of rebase opcodes.
	if (!(isThreadLocalVariables(isec->getFlags()) && isa<Defined>(sym)))
	addNonLazyBindingEntries(sym, isec, r.offset, r.addend);
	}
	}

	void Writer::scanRelocations() {
	TimeTraceScope timeScope("Scan relocations");

	// This can't use a for-each loop: It calls treatUndefinedSymbol(), which can
	// add to inputSections, which invalidates inputSections's iterators.
	for (size_t i = 0; i < inputSections.size(); ++i) {
	ConcatInputSection *isec = inputSections[i];

	if (isec->shouldOmitFromOutput())
	continue;

	for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
	lld::macho::Reloc &r = *it;
	if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
	// Skip over the following UNSIGNED relocation -- it's just there as the
	// minuend, and doesn't have the usual UNSIGNED semantics. We don't want
	// to emit rebase opcodes for it.
	it++;
	continue;
	}
	if (auto sym = r.referent.dyn_cast<Symbol >()) {
	if (auto *undefined = dyn_cast<Undefined>(sym))
	treatUndefinedSymbol(*undefined, isec, r.offset);
	// treatUndefinedSymbol() can replace sym with a DylibSymbol; re-check.
	if (!isa<Undefined>(sym) && validateSymbolRelocation(sym, isec, r))
	prepareSymbolRelocation(sym, isec, r);
	} else {
	// Canonicalize the referent so that later accesses in Writer won't
	// have to worry about it. Perhaps we should do this for Defined::isec
	// too...
	auto referentIsec = r.referent.get<InputSection >();
	r.referent = referentIsec->canonical();
	if (!r.pcrel)
	in.rebase->addEntry(isec, r.offset);
	}
	}
	}

	- in.unwindInfo->prepareRelocations();
	+ in.unwindInfo->prepare();
	}

	void Writer::scanSymbols() {
	TimeTraceScope timeScope("Scan symbols");
	for (Symbol *sym : symtab->getSymbols()) {
	if (auto *defined = dyn_cast<Defined>(sym)) {
	if (!defined->isLive())
	continue;
	defined->canonicalize();
	if (defined->overridesWeakDef)
	in.weakBinding->addNonWeakDefinition(defined);
	if (!defined->isAbsolute() && isCodeSection(defined->isec))
	in.unwindInfo->addSymbol(defined);
	} else if (const auto *dysym = dyn_cast<DylibSymbol>(sym)) {
	// This branch intentionally doesn't check isLive().
	if (dysym->isDynamicLookup())
	continue;
	dysym->getFile()->refState =
	std::max(dysym->getFile()->refState, dysym->getRefState());
	}
	}

	for (const InputFile *file : inputFiles) {
	if (auto *objFile = dyn_cast<ObjFile>(file))
	for (Symbol *sym : objFile->symbols) {
	if (auto *defined = dyn_cast_or_null<Defined>(sym)) {
	if (!defined->isLive())
	continue;
	defined->canonicalize();
	if (!defined->isExternal() && !defined->isAbsolute() &&
	isCodeSection(defined->isec))
	in.unwindInfo->addSymbol(defined);
	}
	}
	}
	}

	// TODO: ld64 enforces the old load commands in a few other cases.
	static bool useLCBuildVersion(const PlatformInfo &platformInfo) {
	static const std::vector<std::pair<PlatformType, VersionTuple>> minVersion = {
	{PLATFORM_MACOS, VersionTuple(10, 14)},
	{PLATFORM_IOS, VersionTuple(12, 0)},
	{PLATFORM_IOSSIMULATOR, VersionTuple(13, 0)},
	{PLATFORM_TVOS, VersionTuple(12, 0)},
	{PLATFORM_TVOSSIMULATOR, VersionTuple(13, 0)},
	{PLATFORM_WATCHOS, VersionTuple(5, 0)},
	{PLATFORM_WATCHOSSIMULATOR, VersionTuple(6, 0)}};
	auto it = llvm::find_if(minVersion, [&](const auto &p) {
	return p.first == platformInfo.target.Platform;
	});
	return it == minVersion.end() ? true : platformInfo.minimum >= it->second;
	}

	template <class LP> void Writer::createLoadCommands() {
	uint8_t segIndex = 0;
	for (OutputSegment *seg : outputSegments) {
	in.header->addLoadCommand(make<LCSegment<LP>>(seg->name, seg));
	seg->index = segIndex++;
	}

	in.header->addLoadCommand(make<LCDyldInfo>(
	in.rebase, in.binding, in.weakBinding, in.lazyBinding, in.exports));
	in.header->addLoadCommand(make<LCSymtab>(symtabSection, stringTableSection));
	in.header->addLoadCommand(
	make<LCDysymtab>(symtabSection, indirectSymtabSection));
	if (!config->umbrella.empty())
	in.header->addLoadCommand(make<LCSubFramework>(config->umbrella));
	if (config->emitEncryptionInfo)
	in.header->addLoadCommand(make<LCEncryptionInfo<LP>>());
	for (StringRef path : config->runtimePaths)
	in.header->addLoadCommand(make<LCRPath>(path));

	switch (config->outputType) {
	case MH_EXECUTE:
	in.header->addLoadCommand(make<LCLoadDylinker>());
	break;
	case MH_DYLIB:
	in.header->addLoadCommand(make<LCDylib>(LC_ID_DYLIB, config->installName,
	config->dylibCompatibilityVersion,
	config->dylibCurrentVersion));
	break;
	case MH_BUNDLE:
	break;
	default:
	llvm_unreachable("unhandled output file type");
	}

	uuidCommand = make<LCUuid>();
	in.header->addLoadCommand(uuidCommand);

	if (useLCBuildVersion(config->platformInfo))
	in.header->addLoadCommand(make<LCBuildVersion>(config->platformInfo));
	else
	in.header->addLoadCommand(make<LCMinVersion>(config->platformInfo));

	if (config->secondaryPlatformInfo) {
	in.header->addLoadCommand(
	make<LCBuildVersion>(*config->secondaryPlatformInfo));
	}

	// This is down here to match ld64's load command order.
	if (config->outputType == MH_EXECUTE)
	in.header->addLoadCommand(make<LCMain>());

	// See ld64's OutputFile::buildDylibOrdinalMapping for the corresponding
	// library ordinal computation code in ld64.
	int64_t dylibOrdinal = 1;
	DenseMap<StringRef, int64_t> ordinalForInstallName;

	std::vector<DylibFile *> dylibFiles;
	for (InputFile *file : inputFiles) {
	if (auto *dylibFile = dyn_cast<DylibFile>(file))
	dylibFiles.push_back(dylibFile);
	}
	for (size_t i = 0; i < dylibFiles.size(); ++i)
	dylibFiles.insert(dylibFiles.end(), dylibFiles[i]->extraDylibs.begin(),
	dylibFiles[i]->extraDylibs.end());

	for (DylibFile *dylibFile : dylibFiles) {
	if (dylibFile->isBundleLoader) {
	dylibFile->ordinal = BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE;
	// Shortcut since bundle-loader does not re-export the symbols.

	dylibFile->reexport = false;
	continue;
	}

	// Don't emit load commands for a dylib that is not referenced if:
	// - it was added implicitly (via a reexport, an LC_LOAD_DYLINKER --
	// if it's on the linker command line, it's explicit)
	// - or it's marked MH_DEAD_STRIPPABLE_DYLIB
	// - or the flag -dead_strip_dylibs is used
	// FIXME: `isReferenced()` is currently computed before dead code
	// stripping, so references from dead code keep a dylib alive. This
	// matches ld64, but it's something we should do better.
	if (!dylibFile->isReferenced() && !dylibFile->forceNeeded &&
	(!dylibFile->isExplicitlyLinked() \|\| dylibFile->deadStrippable \|\|
	config->deadStripDylibs))
	continue;

	// Several DylibFiles can have the same installName. Only emit a single
	// load command for that installName and give all these DylibFiles the
	// same ordinal.
	// This can happen in several cases:
	// - a new framework could change its installName to an older
	// framework name via an $ld$ symbol depending on platform_version
	// - symlinks (for example, libpthread.tbd is a symlink to libSystem.tbd;
	// Foo.framework/Foo.tbd is usually a symlink to
	// Foo.framework/Versions/Current/Foo.tbd, where
	// Foo.framework/Versions/Current is usually a symlink to
	// Foo.framework/Versions/A)
	// - a framework can be linked both explicitly on the linker
	// command line and implicitly as a reexport from a different
	// framework. The re-export will usually point to the tbd file
	// in Foo.framework/Versions/A/Foo.tbd, while the explicit link will
	// usually find Foo.framework/Foo.tbd. These are usually symlinks,
	// but in a --reproduce archive they will be identical but distinct
	// files.
	// In the first case, semantically distinct DylibFiles will have the
	// same installName.
	int64_t &ordinal = ordinalForInstallName[dylibFile->installName];
	if (ordinal) {
	dylibFile->ordinal = ordinal;
	continue;
	}

	ordinal = dylibFile->ordinal = dylibOrdinal++;
	LoadCommandType lcType =
	dylibFile->forceWeakImport \|\| dylibFile->refState == RefState::Weak
	? LC_LOAD_WEAK_DYLIB
	: LC_LOAD_DYLIB;
	in.header->addLoadCommand(make<LCDylib>(lcType, dylibFile->installName,
	dylibFile->compatibilityVersion,
	dylibFile->currentVersion));

	if (dylibFile->reexport)
	in.header->addLoadCommand(
	make<LCDylib>(LC_REEXPORT_DYLIB, dylibFile->installName));
	}

	if (functionStartsSection)
	in.header->addLoadCommand(make<LCFunctionStarts>(functionStartsSection));
	if (dataInCodeSection)
	in.header->addLoadCommand(make<LCDataInCode>(dataInCodeSection));
	if (codeSignatureSection)
	in.header->addLoadCommand(make<LCCodeSignature>(codeSignatureSection));

	const uint32_t MACOS_MAXPATHLEN = 1024;
	config->headerPad = std::max(
	config->headerPad, (config->headerPadMaxInstallNames
	? LCDylib::getInstanceCount() * MACOS_MAXPATHLEN
	: 0));
	}

	// Sorting only can happen once all outputs have been collected. Here we sort
	// segments, output sections within each segment, and input sections within each
	// output segment.
	static void sortSegmentsAndSections() {
	TimeTraceScope timeScope("Sort segments and sections");
	sortOutputSegments();

	DenseMap<const InputSection *, size_t> isecPriorities =
	priorityBuilder.buildInputSectionPriorities();

	uint32_t sectionIndex = 0;
	for (OutputSegment *seg : outputSegments) {
	seg->sortOutputSections();
	// References from thread-local variable sections are treated as offsets
	// relative to the start of the thread-local data memory area, which
	// is initialized via copying all the TLV data sections (which are all
	// contiguous). If later data sections require a greater alignment than
	// earlier ones, the offsets of data within those sections won't be
	// guaranteed to aligned unless we normalize alignments. We therefore use
	// the largest alignment for all TLV data sections.
	uint32_t tlvAlign = 0;
	for (const OutputSection *osec : seg->getSections())
	if (isThreadLocalData(osec->flags) && osec->align > tlvAlign)
	tlvAlign = osec->align;

	for (OutputSection *osec : seg->getSections()) {
	// Now that the output sections are sorted, assign the final
	// output section indices.
	if (!osec->isHidden())
	osec->index = ++sectionIndex;
	if (isThreadLocalData(osec->flags)) {
	if (!firstTLVDataSection)
	firstTLVDataSection = osec;
	osec->align = tlvAlign;
	}

	if (!isecPriorities.empty()) {
	if (auto *merged = dyn_cast<ConcatOutputSection>(osec)) {
	llvm::stable_sort(merged->inputs,
	[&](InputSection a, InputSection b) {
	return isecPriorities[a] > isecPriorities[b];
	});
	}
	}
	}
	}
	}

	template <class LP> void Writer::createOutputSections() {
	TimeTraceScope timeScope("Create output sections");
	// First, create hidden sections
	stringTableSection = make<StringTableSection>();
	symtabSection = makeSymtabSection<LP>(*stringTableSection);
	indirectSymtabSection = make<IndirectSymtabSection>();
	if (config->adhocCodesign)
	codeSignatureSection = make<CodeSignatureSection>();
	if (config->emitDataInCodeInfo)
	dataInCodeSection = make<DataInCodeSection>();
	if (config->emitFunctionStarts)
	functionStartsSection = make<FunctionStartsSection>();
	if (config->emitBitcodeBundle)
	make<BitcodeBundleSection>();

	switch (config->outputType) {
	case MH_EXECUTE:
	make<PageZeroSection>();
	break;
	case MH_DYLIB:
	case MH_BUNDLE:
	break;
	default:
	llvm_unreachable("unhandled output file type");
	}

	// Then add input sections to output sections.
	for (ConcatInputSection *isec : inputSections) {
	if (isec->shouldOmitFromOutput())
	continue;
	ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
	osec->addInput(isec);
	osec->inputOrder =
	std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
	}

	// Once all the inputs are added, we can finalize the output section
	// properties and create the corresponding output segments.
	for (const auto &it : concatOutputSections) {
	StringRef segname = it.first.first;
	ConcatOutputSection *osec = it.second;
	assert(segname != segment_names::ld);
	if (osec->isNeeded()) {
	// See comment in ObjFile::splitEhFrames()
	if (osec->name == section_names::ehFrame &&
	segname == segment_names::text)
	osec->align = target->wordSize;

	getOrCreateOutputSegment(segname)->addOutputSection(osec);
	}
	}

	for (SyntheticSection *ssec : syntheticSections) {
	auto it = concatOutputSections.find({ssec->segname, ssec->name});
	// We add all LinkEdit sections here because we don't know if they are
	// needed until their finalizeContents() methods get called later. While
	// this means that we add some redundant sections to __LINKEDIT, there is
	// is no redundancy in the output, as we do not emit section headers for
	// any LinkEdit sections.
	if (ssec->isNeeded() \|\| ssec->segname == segment_names::linkEdit) {
	if (it == concatOutputSections.end()) {
	getOrCreateOutputSegment(ssec->segname)->addOutputSection(ssec);
	} else {
	fatal("section from " +
	toString(it->second->firstSection()->getFile()) +
	" conflicts with synthetic section " + ssec->segname + "," +
	ssec->name);
	}
	}
	}

	// dyld requires __LINKEDIT segment to always exist (even if empty).
	linkEditSegment = getOrCreateOutputSegment(segment_names::linkEdit);
	}

	void Writer::finalizeAddresses() {
	TimeTraceScope timeScope("Finalize addresses");
	uint64_t pageSize = target->getPageSize();

	// We could parallelize this loop, but local benchmarking indicates it is
	// faster to do it all in the main thread.
	for (OutputSegment *seg : outputSegments) {
	if (seg == linkEditSegment)
	continue;
	for (OutputSection *osec : seg->getSections()) {
	if (!osec->isNeeded())
	continue;
	// Other kinds of OutputSections have already been finalized.
	if (auto concatOsec = dyn_cast<ConcatOutputSection>(osec))
	concatOsec->finalizeContents();
	}
	}

	// Ensure that segments (and the sections they contain) are allocated
	// addresses in ascending order, which dyld requires.
	//
	// Note that at this point, __LINKEDIT sections are empty, but we need to
	// determine addresses of other segments/sections before generating its
	// contents.
	for (OutputSegment *seg : outputSegments) {
	if (seg == linkEditSegment)
	continue;
	seg->addr = addr;
	assignAddresses(seg);
	// codesign / libstuff checks for segment ordering by verifying that
	// `fileOff + fileSize == next segment fileOff`. So we call alignTo() before
	// (instead of after) computing fileSize to ensure that the segments are
	// contiguous. We handle addr / vmSize similarly for the same reason.
	fileOff = alignTo(fileOff, pageSize);
	addr = alignTo(addr, pageSize);
	seg->vmSize = addr - seg->addr;
	seg->fileSize = fileOff - seg->fileOff;
	seg->assignAddressesToStartEndSymbols();
	}
	}

	void Writer::finalizeLinkEditSegment() {
	TimeTraceScope timeScope("Finalize __LINKEDIT segment");
	// Fill __LINKEDIT contents.
	std::vector<LinkEditSection *> linkEditSections{
	in.rebase,
	in.binding,
	in.weakBinding,
	in.lazyBinding,
	in.exports,
	symtabSection,
	indirectSymtabSection,
	dataInCodeSection,
	functionStartsSection,
	};
	SmallVector<std::shared_future<void>> threadFutures;
	threadFutures.reserve(linkEditSections.size());
	for (LinkEditSection *osec : linkEditSections)
	if (osec)
	threadFutures.emplace_back(threadPool.async(
	[](LinkEditSection *osec) { osec->finalizeContents(); }, osec));
	for (std::shared_future<void> &future : threadFutures)
	future.wait();

	// Now that __LINKEDIT is filled out, do a proper calculation of its
	// addresses and offsets.
	linkEditSegment->addr = addr;
	assignAddresses(linkEditSegment);
	// No need to page-align fileOff / addr here since this is the last segment.
	linkEditSegment->vmSize = addr - linkEditSegment->addr;
	linkEditSegment->fileSize = fileOff - linkEditSegment->fileOff;
	}

	void Writer::assignAddresses(OutputSegment *seg) {
	seg->fileOff = fileOff;

	for (OutputSection *osec : seg->getSections()) {
	if (!osec->isNeeded())
	continue;
	addr = alignTo(addr, osec->align);
	fileOff = alignTo(fileOff, osec->align);
	osec->addr = addr;
	osec->fileOff = isZeroFill(osec->flags) ? 0 : fileOff;
	osec->finalize();
	osec->assignAddressesToStartEndSymbols();

	addr += osec->getSize();
	fileOff += osec->getFileSize();
	}
	}

	void Writer::openFile() {
	Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
	FileOutputBuffer::create(config->outputFile, fileOff,
	FileOutputBuffer::F_executable);

	if (!bufferOrErr)
	fatal("failed to open " + config->outputFile + ": " +
	llvm::toString(bufferOrErr.takeError()));
	buffer = std::move(*bufferOrErr);
	in.bufferStart = buffer->getBufferStart();
	}

	void Writer::writeSections() {
	uint8_t *buf = buffer->getBufferStart();
	std::vector<const OutputSection *> osecs;
	for (const OutputSegment *seg : outputSegments)
	append_range(osecs, seg->getSections());

	parallelForEach(osecs.begin(), osecs.end(), [&](const OutputSection *osec) {
	osec->writeTo(buf + osec->fileOff);
	});
	}

	// In order to utilize multiple cores, we first split the buffer into chunks,
	// compute a hash for each chunk, and then compute a hash value of the hash
	// values.
	void Writer::writeUuid() {
	TimeTraceScope timeScope("Computing UUID");

	ArrayRef<uint8_t> data{buffer->getBufferStart(), buffer->getBufferEnd()};
	unsigned chunkCount = parallel::strategy.compute_thread_count() * 10;
	// Round-up integer division
	size_t chunkSize = (data.size() + chunkCount - 1) / chunkCount;
	std::vector<ArrayRef<uint8_t>> chunks = split(data, chunkSize);
	// Leave one slot for filename
	std::vector<uint64_t> hashes(chunks.size() + 1);
	SmallVector<std::shared_future<void>> threadFutures;
	threadFutures.reserve(chunks.size());
	for (size_t i = 0; i < chunks.size(); ++i)
	threadFutures.emplace_back(threadPool.async(
	[&](size_t j) { hashes[j] = xxHash64(chunks[j]); }, i));
	for (std::shared_future<void> &future : threadFutures)
	future.wait();
	// Append the output filename so that identical binaries with different names
	// don't get the same UUID.
	hashes[chunks.size()] = xxHash64(sys::path::filename(config->finalOutput));
	uint64_t digest = xxHash64({reinterpret_cast<uint8_t *>(hashes.data()),
	hashes.size() * sizeof(uint64_t)});
	uuidCommand->writeUuid(digest);
	}

	void Writer::writeCodeSignature() {
	if (codeSignatureSection) {
	TimeTraceScope timeScope("Write code signature");
	codeSignatureSection->writeHashes(buffer->getBufferStart());
	}
	}

	void Writer::writeOutputFile() {
	TimeTraceScope timeScope("Write output file");
	openFile();
	reportPendingUndefinedSymbols();
	if (errorCount())
	return;
	writeSections();
	writeUuid();
	writeCodeSignature();

	if (auto e = buffer->commit())
	error("failed to write to the output file: " + toString(std::move(e)));
	}

	template <class LP> void Writer::run() {
	treatSpecialUndefineds();
	if (config->entry && !isa<Undefined>(config->entry))
	prepareBranchTarget(config->entry);

	// Canonicalization of all pointers to InputSections should be handled by
	// these two scan* methods. I.e. from this point onward, for all live
	// InputSections, we should have `isec->canonical() == isec`.
	scanSymbols();
	scanRelocations();

	// Do not proceed if there was an undefined symbol.
	reportPendingUndefinedSymbols();
	if (errorCount())
	return;

	if (in.stubHelper->isNeeded())
	in.stubHelper->setup();

	if (in.objCImageInfo->isNeeded())
	in.objCImageInfo->finalizeContents();

	// At this point, we should know exactly which output sections are needed,
	// courtesy of scanSymbols() and scanRelocations().
	createOutputSections<LP>();

	// After this point, we create no new segments; HOWEVER, we might
	// yet create branch-range extension thunks for architectures whose
	// hardware call instructions have limited range, e.g., ARM(64).
	// The thunks are created as InputSections interspersed among
	// the ordinary __TEXT,_text InputSections.
	sortSegmentsAndSections();
	createLoadCommands<LP>();
	finalizeAddresses();
	threadPool.async([&] {
	if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
	timeTraceProfilerInitialize(config->timeTraceGranularity, "writeMapFile");
	writeMapFile();
	if (LLVM_ENABLE_THREADS && config->timeTraceEnabled)
	timeTraceProfilerFinishThread();
	});
	finalizeLinkEditSegment();
	writeOutputFile();
	}

	template <class LP> void macho::writeResult() { Writer().run<LP>(); }

	void macho::resetWriter() { LCDylib::resetInstanceCount(); }

	void macho::createSyntheticSections() {
	in.header = make<MachHeaderSection>();
	if (config->dedupLiterals)
	in.cStringSection = make<DeduplicatedCStringSection>();
	else
	in.cStringSection = make<CStringSection>();
	in.wordLiteralSection =
	config->dedupLiterals ? make<WordLiteralSection>() : nullptr;
	in.rebase = make<RebaseSection>();
	in.binding = make<BindingSection>();
	in.weakBinding = make<WeakBindingSection>();
	in.lazyBinding = make<LazyBindingSection>();
	in.exports = make<ExportSection>();
	in.got = make<GotSection>();
	in.tlvPointers = make<TlvPointerSection>();
	in.lazyPointers = make<LazyPointerSection>();
	in.stubs = make<StubsSection>();
	in.stubHelper = make<StubHelperSection>();
	in.unwindInfo = makeUnwindInfoSection();
	in.objCImageInfo = make<ObjCImageInfoSection>();

	// This section contains space for just a single word, and will be used by
	// dyld to cache an address to the image loader it uses.
	uint8_t *arr = bAlloc().Allocate<uint8_t>(target->wordSize);
	memset(arr, 0, target->wordSize);
	in.imageLoaderCache = makeSyntheticInputSection(
	segment_names::data, section_names::data, S_REGULAR,
	ArrayRef<uint8_t>{arr, target->wordSize},
	/align=/target->wordSize);
	// References from dyld are not visible to us, so ensure this section is
	// always treated as live.
	in.imageLoaderCache->live = true;
	}

	OutputSection *macho::firstTLVDataSection = nullptr;

	template void macho::writeResult<LP64>();
	template void macho::writeResult<ILP32>();
	diff --git a/lldb/bindings/interfaces.swig b/lldb/bindings/interfaces.swig
	index c9a6d0f06056..021c7683d170 100644
	--- a/lldb/bindings/interfaces.swig
	+++ b/lldb/bindings/interfaces.swig
	@@ -1,84 +1,81 @@
	/* Various liblldb typedefs that SWIG needs to know about. */
	#define __extension__ /* Undefine GCC keyword to make Swig happy when processing glibc's stdint.h. */
	-/* The ISO C99 standard specifies that in C++ implementations limit macros such
	- as INT32_MAX should only be defined if __STDC_LIMIT_MACROS is. */
	-#define __STDC_LIMIT_MACROS
	%include "stdint.i"

	%include "lldb/lldb-defines.h"
	%include "lldb/lldb-enumerations.h"
	%include "lldb/lldb-forward.h"
	%include "lldb/lldb-types.h"

	/* Forward declaration of SB classes. */
	%include "lldb/API/SBDefines.h"

	/* Python interface files with docstrings. */
	%include "./interface/SBAddress.i"
	%include "./interface/SBAttachInfo.i"
	%include "./interface/SBBlock.i"
	%include "./interface/SBBreakpoint.i"
	%include "./interface/SBBreakpointLocation.i"
	%include "./interface/SBBreakpointName.i"
	%include "./interface/SBBroadcaster.i"
	%include "./interface/SBCommandInterpreter.i"
	%include "./interface/SBCommandInterpreterRunOptions.i"
	%include "./interface/SBCommandReturnObject.i"
	%include "./interface/SBCommunication.i"
	%include "./interface/SBCompileUnit.i"
	%include "./interface/SBData.i"
	%include "./interface/SBDebugger.i"
	%include "./interface/SBDeclaration.i"
	%include "./interface/SBError.i"
	%include "./interface/SBEnvironment.i"
	%include "./interface/SBEvent.i"
	%include "./interface/SBExecutionContext.i"
	%include "./interface/SBExpressionOptions.i"
	%include "./interface/SBFile.i"
	%include "./interface/SBFileSpec.i"
	%include "./interface/SBFileSpecList.i"
	%include "./interface/SBFrame.i"
	%include "./interface/SBFunction.i"
	%include "./interface/SBHostOS.i"
	%include "./interface/SBInstruction.i"
	%include "./interface/SBInstructionList.i"
	%include "./interface/SBLanguageRuntime.i"
	%include "./interface/SBLaunchInfo.i"
	%include "./interface/SBLineEntry.i"
	%include "./interface/SBListener.i"
	%include "./interface/SBMemoryRegionInfo.i"
	%include "./interface/SBMemoryRegionInfoList.i"
	%include "./interface/SBModule.i"
	%include "./interface/SBModuleSpec.i"
	%include "./interface/SBPlatform.i"
	%include "./interface/SBProcess.i"
	%include "./interface/SBProcessInfo.i"
	%include "./interface/SBQueue.i"
	%include "./interface/SBQueueItem.i"
	%include "./interface/SBReproducer.i"
	%include "./interface/SBSection.i"
	%include "./interface/SBSourceManager.i"
	%include "./interface/SBStream.i"
	%include "./interface/SBStringList.i"
	%include "./interface/SBStructuredData.i"
	%include "./interface/SBSymbol.i"
	%include "./interface/SBSymbolContext.i"
	%include "./interface/SBSymbolContextList.i"
	%include "./interface/SBTarget.i"
	%include "./interface/SBThread.i"
	%include "./interface/SBThreadCollection.i"
	%include "./interface/SBThreadPlan.i"
	%include "./interface/SBTrace.i"
	%include "./interface/SBType.i"
	%include "./interface/SBTypeCategory.i"
	%include "./interface/SBTypeEnumMember.i"
	%include "./interface/SBTypeFilter.i"
	%include "./interface/SBTypeFormat.i"
	%include "./interface/SBTypeNameSpecifier.i"
	%include "./interface/SBTypeSummary.i"
	%include "./interface/SBTypeSynthetic.i"
	%include "./interface/SBUnixSignals.i"
	%include "./interface/SBValue.i"
	%include "./interface/SBValueList.i"
	%include "./interface/SBVariablesOptions.i"
	%include "./interface/SBWatchpoint.i"
	diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig
	index bf3de66b91bf..d45431c771ca 100644
	--- a/lldb/bindings/python/python-typemaps.swig
	+++ b/lldb/bindings/python/python-typemaps.swig
	@@ -1,517 +1,517 @@
	/* Typemap definitions, to allow SWIG to properly handle 'char*' data types. /

	%inline %{

	#include "../bindings/python/python-typemaps.h"

	%}

	%typemap(in) char ** {
	/* Check if is a list */
	if (PythonList::Check($input)) {
	PythonList list(PyRefType::Borrowed, $input);
	int size = list.GetSize();
	int i = 0;
	$1 = (char *)malloc((size + 1) sizeof(char *));
	for (i = 0; i < size; i++) {
	PythonString py_str = list.GetItemAtIndex(i).AsType<PythonString>();
	if (!py_str.IsAllocated()) {
	PyErr_SetString(PyExc_TypeError, "list must contain strings");
	free($1);
	return nullptr;
	}

	$1[i] = const_cast<char *>(py_str.GetString().data());
	}
	$1[i] = 0;
	} else if ($input == Py_None) {
	$1 = NULL;
	} else {
	PyErr_SetString(PyExc_TypeError, "not a list");
	return NULL;
	}
	}

	%typemap(typecheck) char ** {
	/* Check if is a list */
	$1 = 1;
	if (PythonList::Check($input)) {
	PythonList list(PyRefType::Borrowed, $input);
	int size = list.GetSize();
	int i = 0;
	for (i = 0; i < size; i++) {
	PythonString s = list.GetItemAtIndex(i).AsType<PythonString>();
	if (!s.IsAllocated()) {
	$1 = 0;
	}
	}
	} else {
	$1 = (($input == Py_None) ? 1 : 0);
	}
	}

	%typemap(freearg) char** {
	free((char *) $1);
	}

	%typemap(out) char** {
	int len;
	int i;
	len = 0;
	while ($1[len])
	len++;
	PythonList list(len);
	for (i = 0; i < len; i++)
	list.SetItemAtIndex(i, PythonString($1[i]));
	$result = list.release();
	}

	%typemap(in) lldb::tid_t {
	PythonObject obj = Retain<PythonObject>($input);
	lldb::tid_t value = unwrapOrSetPythonException(As<unsigned long long>(obj));
	if (PyErr_Occurred())
	return nullptr;
	$1 = value;
	}

	%typemap(in) lldb::StateType {
	PythonObject obj = Retain<PythonObject>($input);
	unsigned long long state_type_value =
	unwrapOrSetPythonException(As<unsigned long long>(obj));
	if (PyErr_Occurred())
	return nullptr;
	if (state_type_value > lldb::StateType::kLastStateType) {
	PyErr_SetString(PyExc_ValueError, "Not a valid StateType value");
	return nullptr;
	}
	$1 = static_cast<lldb::StateType>(state_type_value);
	}

	/* Typemap definitions to allow SWIG to properly handle char buffer. */

	// typemap for a char buffer
	%typemap(in) (char *dst, size_t dst_len) {
	if (!PyInt_Check($input)) {
	PyErr_SetString(PyExc_ValueError, "Expecting an integer");
	return NULL;
	}
	$2 = PyInt_AsLong($input);
	if ($2 <= 0) {
	PyErr_SetString(PyExc_ValueError, "Positive integer expected");
	return NULL;
	}
	$1 = (char *)malloc($2);
	}
	// SBProcess::ReadCStringFromMemory() uses a void*, but needs to be treated
	// as char data instead of byte data.
	%typemap(in) (void char_buf, size_t size) = (char dst, size_t dst_len);

	// Return the char buffer. Discarding any previous return result
	%typemap(argout) (char *dst, size_t dst_len) {
	Py_XDECREF($result); /* Blow away any previous result */
	if (result == 0) {
	PythonString string("");
	$result = string.release();
	Py_INCREF($result);
	} else {
	llvm::StringRef ref(static_cast<const char *>($1), result);
	PythonString string(ref);
	$result = string.release();
	}
	free($1);
	}
	// SBProcess::ReadCStringFromMemory() uses a void*, but needs to be treated
	// as char data instead of byte data.
	%typemap(argout) (void char_buf, size_t size) = (char dst, size_t dst_len);


	// typemap for handling an snprintf-like API like SBThread::GetStopDescription.
	%typemap(in) (char *dst_or_null, size_t dst_len) {
	if (!PyInt_Check($input)) {
	PyErr_SetString(PyExc_ValueError, "Expecting an integer");
	return NULL;
	}
	$2 = PyInt_AsLong($input);
	if ($2 <= 0) {
	PyErr_SetString(PyExc_ValueError, "Positive integer expected");
	return NULL;
	}
	$1 = (char *)malloc($2);
	}
	%typemap(argout) (char *dst_or_null, size_t dst_len) {
	Py_XDECREF($result); /* Blow away any previous result */
	llvm::StringRef ref($1);
	PythonString string(ref);
	$result = string.release();
	free($1);
	}


	// typemap for an outgoing buffer
	// See also SBEvent::SBEvent(uint32_t event, const char *cstr, uint32_t cstr_len).
	// Ditto for SBProcess::PutSTDIN(const char *src, size_t src_len).
	%typemap(in) (const char *cstr, uint32_t cstr_len),
	(const char *src, size_t src_len) {
	if (PythonString::Check($input)) {
	PythonString str(PyRefType::Borrowed, $input);
	$1 = (char *)str.GetString().data();
	$2 = str.GetSize();
	} else if (PythonByteArray::Check($input)) {
	PythonByteArray bytearray(PyRefType::Borrowed, $input);
	$1 = (char *)bytearray.GetBytes().data();
	$2 = bytearray.GetSize();
	} else if (PythonBytes::Check($input)) {
	PythonBytes bytes(PyRefType::Borrowed, $input);
	$1 = (char *)bytes.GetBytes().data();
	$2 = bytes.GetSize();
	} else {
	PyErr_SetString(PyExc_ValueError, "Expecting a string");
	return NULL;
	}
	}
	// For SBProcess::WriteMemory, SBTarget::GetInstructions and SBDebugger::DispatchInput.
	%typemap(in) (const void *buf, size_t size),
	(const void *data, size_t data_len) {
	if (PythonString::Check($input)) {
	PythonString str(PyRefType::Borrowed, $input);
	$1 = (void *)str.GetString().data();
	$2 = str.GetSize();
	} else if (PythonByteArray::Check($input)) {
	PythonByteArray bytearray(PyRefType::Borrowed, $input);
	$1 = (void *)bytearray.GetBytes().data();
	$2 = bytearray.GetSize();
	} else if (PythonBytes::Check($input)) {
	PythonBytes bytes(PyRefType::Borrowed, $input);
	$1 = (void *)bytes.GetBytes().data();
	$2 = bytes.GetSize();
	} else {
	PyErr_SetString(PyExc_ValueError, "Expecting a buffer");
	return NULL;
	}
	}

	// typemap for an incoming buffer
	// See also SBProcess::ReadMemory.
	%typemap(in) (void *buf, size_t size) {
	if (PyInt_Check($input)) {
	$2 = PyInt_AsLong($input);
	} else if (PyLong_Check($input)) {
	$2 = PyLong_AsLong($input);
	} else {
	PyErr_SetString(PyExc_ValueError, "Expecting an integer or long object");
	return NULL;
	}
	if ($2 <= 0) {
	PyErr_SetString(PyExc_ValueError, "Positive integer expected");
	return NULL;
	}
	$1 = (void *)malloc($2);
	}

	// Return the buffer. Discarding any previous return result
	// See also SBProcess::ReadMemory.
	%typemap(argout) (void *buf, size_t size) {
	Py_XDECREF($result); /* Blow away any previous result */
	if (result == 0) {
	$result = Py_None;
	Py_INCREF($result);
	} else {
	PythonBytes bytes(static_cast<const uint8_t *>($1), result);
	$result = bytes.release();
	}
	free($1);
	}

	%{
	namespace {
	template <class T>
	T PyLongAsT(PyObject *obj) {
	static_assert(true, "unsupported type");
	}

	template <> uint64_t PyLongAsT<uint64_t>(PyObject *obj) {
	return static_cast<uint64_t>(PyLong_AsUnsignedLongLong(obj));
	}

	template <> uint32_t PyLongAsT<uint32_t>(PyObject *obj) {
	return static_cast<uint32_t>(PyLong_AsUnsignedLong(obj));
	}

	template <> int64_t PyLongAsT<int64_t>(PyObject *obj) {
	return static_cast<int64_t>(PyLong_AsLongLong(obj));
	}

	template <> int32_t PyLongAsT<int32_t>(PyObject *obj) {
	return static_cast<int32_t>(PyLong_AsLong(obj));
	}

	template <class T> bool SetNumberFromPyObject(T &number, PyObject *obj) {
	if (PyInt_Check(obj))
	number = static_cast<T>(PyInt_AsLong(obj));
	else if (PyLong_Check(obj))
	number = PyLongAsT<T>(obj);
	else
	return false;

	return true;
	}

	template <> bool SetNumberFromPyObject<double>(double &number, PyObject *obj) {
	if (PyFloat_Check(obj)) {
	number = PyFloat_AsDouble(obj);
	return true;
	}

	return false;
	}

	} // namespace
	%}

	// these typemaps allow Python users to pass list objects
	// and have them turn into C++ arrays (this is useful, for instance
	// when creating SBData objects from lists of numbers)
	%typemap(in) (uint64_t* array, size_t array_len),
	(uint32_t* array, size_t array_len),
	(int64_t* array, size_t array_len),
	(int32_t* array, size_t array_len),
	(double* array, size_t array_len) {
	/* Check if is a list */
	if (PyList_Check($input)) {
	int size = PyList_Size($input);
	int i = 0;
	$2 = size;
	$1 = ($1_type)malloc(size * sizeof($*1_type));
	for (i = 0; i < size; i++) {
	PyObject *o = PyList_GetItem($input, i);
	if (!SetNumberFromPyObject($1[i], o)) {
	PyErr_SetString(PyExc_TypeError, "list must contain numbers");
	free($1);
	return NULL;
	}

	if (PyErr_Occurred()) {
	free($1);
	return NULL;
	}
	}
	} else if ($input == Py_None) {
	$1 = NULL;
	$2 = 0;
	} else {
	PyErr_SetString(PyExc_TypeError, "not a list");
	return NULL;
	}
	}

	%typemap(freearg) (uint64_t* array, size_t array_len),
	(uint32_t* array, size_t array_len),
	(int64_t* array, size_t array_len),
	(int32_t* array, size_t array_len),
	(double* array, size_t array_len) {
	free($1);
	}

	// these typemaps wrap SBModule::GetVersion() from requiring a memory buffer
	// to the more Pythonic style where a list is returned and no previous allocation
	// is necessary - this will break if more than 50 versions are ever returned
	%typemap(typecheck) (uint32_t *versions, uint32_t num_versions) {
	$1 = ($input == Py_None ? 1 : 0);
	}

	%typemap(in, numinputs=0) (uint32_t *versions) {
	$1 = (uint32_t )malloc(sizeof(uint32_t) 50);
	}

	%typemap(in, numinputs=0) (uint32_t num_versions) {
	$1 = 50;
	}

	%typemap(argout) (uint32_t *versions, uint32_t num_versions) {
	uint32_t count = result;
	if (count >= $2)
	count = $2;
	PyObject *list = PyList_New(count);
	for (uint32_t j = 0; j < count; j++) {
	PyObject *item = PyInt_FromLong($1[j]);
	int ok = PyList_SetItem(list, j, item);
	if (ok != 0) {
	$result = Py_None;
	break;
	}
	}
	$result = list;
	}

	%typemap(freearg) (uint32_t *versions) {
	free($1);
	}


	// For Log::LogOutputCallback
	%typemap(in) (lldb::LogOutputCallback log_callback, void *baton) {
	if (!($input == Py_None \|\|
	PyCallable_Check(reinterpret_cast<PyObject *>($input)))) {
	PyErr_SetString(PyExc_TypeError, "Need a callable object or None!");
	return NULL;
	}

	// FIXME (filcab): We can't currently check if our callback is already
	// LLDBSwigPythonCallPythonLogOutputCallback (to DECREF the previous
	// baton) nor can we just remove all traces of a callback, if we want to
	// revert to a file logging mechanism.

	// Don't lose the callback reference
	Py_INCREF($input);
	$1 = LLDBSwigPythonCallPythonLogOutputCallback;
	$2 = $input;
	}

	%typemap(typecheck) (lldb::LogOutputCallback log_callback, void *baton) {
	$1 = $input == Py_None;
	$1 = $1 \|\| PyCallable_Check(reinterpret_cast<PyObject *>($input));
	}


	%typemap(in) lldb::FileSP {
	PythonFile py_file(PyRefType::Borrowed, $input);
	if (!py_file) {
	PyErr_SetString(PyExc_TypeError, "not a file");
	return nullptr;
	}
	auto sp = unwrapOrSetPythonException(py_file.ConvertToFile());
	if (!sp)
	return nullptr;
	$1 = sp;
	}

	%typemap(in) lldb::FileSP FORCE_IO_METHODS {
	PythonFile py_file(PyRefType::Borrowed, $input);
	if (!py_file) {
	PyErr_SetString(PyExc_TypeError, "not a file");
	return nullptr;
	}
	auto sp = unwrapOrSetPythonException(
	py_file.ConvertToFileForcingUseOfScriptingIOMethods());
	if (!sp)
	return nullptr;
	$1 = sp;
	}

	%typemap(in) lldb::FileSP BORROWED {
	PythonFile py_file(PyRefType::Borrowed, $input);
	if (!py_file) {
	PyErr_SetString(PyExc_TypeError, "not a file");
	return nullptr;
	}
	auto sp =
	unwrapOrSetPythonException(py_file.ConvertToFile(/borrowed=/true));
	if (!sp)
	return nullptr;
	$1 = sp;
	}

	%typemap(in) lldb::FileSP BORROWED_FORCE_IO_METHODS {
	PythonFile py_file(PyRefType::Borrowed, $input);
	if (!py_file) {
	PyErr_SetString(PyExc_TypeError, "not a file");
	return nullptr;
	}
	auto sp = unwrapOrSetPythonException(
	py_file.ConvertToFileForcingUseOfScriptingIOMethods(/borrowed=/true));
	if (!sp)
	return nullptr;
	$1 = sp;
	}

	%typecheck(SWIG_TYPECHECK_POINTER) lldb::FileSP {
	if (PythonFile::Check($input)) {
	$1 = 1;
	} else {
	PyErr_Clear();
	$1 = 0;
	}
	}

	%typemap(out) lldb::FileSP {
	$result = nullptr;
	- lldb::FileSP &sp = $1;
	+ const lldb::FileSP &sp = $1;
	if (sp) {
	PythonFile pyfile = unwrapOrSetPythonException(PythonFile::FromFile(*sp));
	if (!pyfile.IsValid())
	return nullptr;
	$result = pyfile.release();
	}
	if (!$result) {
	$result = Py_None;
	Py_INCREF(Py_None);
	}
	}

	%typemap(in) (const char* string, int len) {
	if ($input == Py_None) {
	$1 = NULL;
	$2 = 0;
	} else if (PythonString::Check($input)) {
	PythonString py_str(PyRefType::Borrowed, $input);
	llvm::StringRef str = py_str.GetString();
	$1 = const_cast<char *>(str.data());
	$2 = str.size();
	// In Python 2, if $input is a PyUnicode object then this
	// will trigger a Unicode -> String conversion, in which
	// case the `PythonString` will now own the PyString. Thus
	// if it goes out of scope, the data will be deleted. The
	// only way to avoid this is to leak the Python object in
	// that case. Note that if there was no conversion, then
	// releasing the string will not leak anything, since we
	// created this as a borrowed reference.
	py_str.release();
	} else {
	PyErr_SetString(PyExc_TypeError, "not a string-like object");
	return NULL;
	}
	}

	// These two pybuffer macros are copied out of swig/Lib/python/pybuffer.i,
	// and fixed so they will not crash if PyObject_GetBuffer fails.
	// https://github.com/swig/swig/issues/1640
	//
	// I've also moved the call to PyBuffer_Release to the end of the SWIG wrapper,
	// doing it right away is not legal according to the python buffer protocol.

	%define %pybuffer_mutable_binary(TYPEMAP, SIZE)
	%typemap(in) (TYPEMAP, SIZE) (Py_buffer_RAII view) {
	int res;
	Py_ssize_t size = 0;
	void *buf = 0;
	res = PyObject_GetBuffer($input, &view.buffer, PyBUF_WRITABLE);
	if (res < 0) {
	PyErr_Clear();
	%argument_fail(res, "(TYPEMAP, SIZE)", $symname, $argnum);
	}
	size = view.buffer.len;
	buf = view.buffer.buf;
	$1 = ($1_ltype)buf;
	$2 = ($2_ltype)(size / sizeof($*1_type));
	}
	%enddef

	%define %pybuffer_binary(TYPEMAP, SIZE)
	%typemap(in) (TYPEMAP, SIZE) (Py_buffer_RAII view) {
	int res;
	Py_ssize_t size = 0;
	const void *buf = 0;
	res = PyObject_GetBuffer($input, &view.buffer, PyBUF_CONTIG_RO);
	if (res < 0) {
	PyErr_Clear();
	%argument_fail(res, "(TYPEMAP, SIZE)", $symname, $argnum);
	}
	size = view.buffer.len;
	buf = view.buffer.buf;
	$1 = ($1_ltype)buf;
	$2 = ($2_ltype)(size / sizeof($*1_type));
	}
	%enddef

	%pybuffer_binary(const uint8_t *buf, size_t num_bytes);
	%pybuffer_mutable_binary(uint8_t *buf, size_t num_bytes);
	diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h
	index 244d328b51f4..aa45aeeec476 100644
	--- a/lldb/include/lldb/API/SBType.h
	+++ b/lldb/include/lldb/API/SBType.h
	@@ -1,271 +1,273 @@
	//===-- SBType.h ------------------------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLDB_API_SBTYPE_H
	#define LLDB_API_SBTYPE_H

	#include "lldb/API/SBDefines.h"

	namespace lldb {

	class SBTypeList;

	class LLDB_API SBTypeMember {
	public:
	SBTypeMember();

	SBTypeMember(const lldb::SBTypeMember &rhs);

	~SBTypeMember();

	lldb::SBTypeMember &operator=(const lldb::SBTypeMember &rhs);

	explicit operator bool() const;

	bool IsValid() const;

	const char *GetName();

	lldb::SBType GetType();

	uint64_t GetOffsetInBytes();

	uint64_t GetOffsetInBits();

	bool IsBitfield();

	uint32_t GetBitfieldSizeInBits();

	bool GetDescription(lldb::SBStream &description,
	lldb::DescriptionLevel description_level);

	protected:
	friend class SBType;

	void reset(lldb_private::TypeMemberImpl *);

	lldb_private::TypeMemberImpl &ref();

	const lldb_private::TypeMemberImpl &ref() const;

	std::unique_ptr<lldb_private::TypeMemberImpl> m_opaque_up;
	};

	class SBTypeMemberFunction {
	public:
	SBTypeMemberFunction();

	SBTypeMemberFunction(const lldb::SBTypeMemberFunction &rhs);

	~SBTypeMemberFunction();

	lldb::SBTypeMemberFunction &operator=(const lldb::SBTypeMemberFunction &rhs);

	explicit operator bool() const;

	bool IsValid() const;

	const char *GetName();

	const char *GetDemangledName();

	const char *GetMangledName();

	lldb::SBType GetType();

	lldb::SBType GetReturnType();

	uint32_t GetNumberOfArguments();

	lldb::SBType GetArgumentTypeAtIndex(uint32_t);

	lldb::MemberFunctionKind GetKind();

	bool GetDescription(lldb::SBStream &description,
	lldb::DescriptionLevel description_level);

	protected:
	friend class SBType;

	void reset(lldb_private::TypeMemberFunctionImpl *);

	lldb_private::TypeMemberFunctionImpl &ref();

	const lldb_private::TypeMemberFunctionImpl &ref() const;

	lldb::TypeMemberFunctionImplSP m_opaque_sp;
	};

	class SBType {
	public:
	SBType();

	SBType(const lldb::SBType &rhs);

	~SBType();

	explicit operator bool() const;

	bool IsValid() const;

	uint64_t GetByteSize();

	bool IsPointerType();

	bool IsReferenceType();

	bool IsFunctionType();

	bool IsPolymorphicClass();

	bool IsArrayType();

	bool IsVectorType();

	bool IsTypedefType();

	bool IsAnonymousType();

	bool IsScopedEnumerationType();

	bool IsAggregateType();

	lldb::SBType GetPointerType();

	lldb::SBType GetPointeeType();

	lldb::SBType GetReferenceType();

	lldb::SBType GetTypedefedType();

	lldb::SBType GetDereferencedType();

	lldb::SBType GetUnqualifiedType();

	lldb::SBType GetArrayElementType();

	lldb::SBType GetArrayType(uint64_t size);

	lldb::SBType GetVectorElementType();

	lldb::SBType GetCanonicalType();

	lldb::SBType GetEnumerationIntegerType();

	// Get the "lldb::BasicType" enumeration for a type. If a type is not a basic
	// type eBasicTypeInvalid will be returned
	lldb::BasicType GetBasicType();

	// The call below confusing and should really be renamed to "CreateBasicType"
	lldb::SBType GetBasicType(lldb::BasicType type);

	uint32_t GetNumberOfFields();

	uint32_t GetNumberOfDirectBaseClasses();

	uint32_t GetNumberOfVirtualBaseClasses();

	lldb::SBTypeMember GetFieldAtIndex(uint32_t idx);

	lldb::SBTypeMember GetDirectBaseClassAtIndex(uint32_t idx);

	lldb::SBTypeMember GetVirtualBaseClassAtIndex(uint32_t idx);

	lldb::SBTypeEnumMemberList GetEnumMembers();

	uint32_t GetNumberOfTemplateArguments();

	lldb::SBType GetTemplateArgumentType(uint32_t idx);

	+ /// Return the TemplateArgumentKind of the template argument at index idx.
	+ /// Variadic argument packs are automatically expanded.
	lldb::TemplateArgumentKind GetTemplateArgumentKind(uint32_t idx);

	lldb::SBType GetFunctionReturnType();

	lldb::SBTypeList GetFunctionArgumentTypes();

	uint32_t GetNumberOfMemberFunctions();

	lldb::SBTypeMemberFunction GetMemberFunctionAtIndex(uint32_t idx);

	lldb::SBModule GetModule();

	const char *GetName();

	const char *GetDisplayTypeName();

	lldb::TypeClass GetTypeClass();

	bool IsTypeComplete();

	uint32_t GetTypeFlags();

	bool GetDescription(lldb::SBStream &description,
	lldb::DescriptionLevel description_level);

	lldb::SBType &operator=(const lldb::SBType &rhs);

	bool operator==(lldb::SBType &rhs);

	bool operator!=(lldb::SBType &rhs);

	protected:
	lldb_private::TypeImpl &ref();

	const lldb_private::TypeImpl &ref() const;

	lldb::TypeImplSP GetSP();

	void SetSP(const lldb::TypeImplSP &type_impl_sp);

	lldb::TypeImplSP m_opaque_sp;

	friend class SBFunction;
	friend class SBModule;
	friend class SBTarget;
	friend class SBTypeEnumMember;
	friend class SBTypeEnumMemberList;
	friend class SBTypeNameSpecifier;
	friend class SBTypeMember;
	friend class SBTypeMemberFunction;
	friend class SBTypeList;
	friend class SBValue;

	SBType(const lldb_private::CompilerType &);
	SBType(const lldb::TypeSP &);
	SBType(const lldb::TypeImplSP &);
	};

	class SBTypeList {
	public:
	SBTypeList();

	SBTypeList(const lldb::SBTypeList &rhs);

	~SBTypeList();

	lldb::SBTypeList &operator=(const lldb::SBTypeList &rhs);

	explicit operator bool() const;

	bool IsValid();

	void Append(lldb::SBType type);

	lldb::SBType GetTypeAtIndex(uint32_t index);

	uint32_t GetSize();

	private:
	std::unique_ptr<lldb_private::TypeListImpl> m_opaque_up;
	friend class SBModule;
	friend class SBCompileUnit;
	};

	} // namespace lldb

	#endif // LLDB_API_SBTYPE_H
	diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h
	index 0ad05a27570e..aefd19d0a859 100644
	--- a/lldb/include/lldb/Symbol/CompilerType.h
	+++ b/lldb/include/lldb/Symbol/CompilerType.h
	@@ -1,422 +1,436 @@
	//===-- CompilerType.h ------------------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLDB_SYMBOL_COMPILERTYPE_H
	#define LLDB_SYMBOL_COMPILERTYPE_H

	#include <functional>
	#include <string>
	#include <vector>

	#include "lldb/lldb-private.h"
	#include "llvm/ADT/APSInt.h"

	namespace lldb_private {

	class DataExtractor;

	/// Generic representation of a type in a programming language.
	///
	/// This class serves as an abstraction for a type inside one of the TypeSystems
	/// implemented by the language plugins. It does not have any actual logic in it
	/// but only stores an opaque pointer and a pointer to the TypeSystem that
	/// gives meaning to this opaque pointer. All methods of this class should call
	/// their respective method in the TypeSystem interface and pass the opaque
	/// pointer along.
	///
	/// \see lldb_private::TypeSystem
	class CompilerType {
	public:
	/// Creates a CompilerType with the given TypeSystem and opaque compiler type.
	///
	/// This constructor should only be called from the respective TypeSystem
	/// implementation.
	///
	/// \see lldb_private::TypeSystemClang::GetType(clang::QualType)
	CompilerType(TypeSystem *type_system, lldb::opaque_compiler_type_t type)
	: m_type(type), m_type_system(type_system) {
	assert(Verify() && "verification failed");
	}

	CompilerType(const CompilerType &rhs)
	: m_type(rhs.m_type), m_type_system(rhs.m_type_system) {}

	CompilerType() = default;

	/// Operators.
	/// \{
	const CompilerType &operator=(const CompilerType &rhs) {
	m_type = rhs.m_type;
	m_type_system = rhs.m_type_system;
	return *this;
	}

	bool operator<(const CompilerType &rhs) const {
	if (m_type_system == rhs.m_type_system)
	return m_type < rhs.m_type;
	return m_type_system < rhs.m_type_system;
	}
	/// \}

	/// Tests.
	/// \{
	explicit operator bool() const {
	return m_type != nullptr && m_type_system != nullptr;
	}

	bool IsValid() const { return m_type != nullptr && m_type_system != nullptr; }

	bool IsArrayType(CompilerType *element_type = nullptr,
	uint64_t *size = nullptr,
	bool *is_incomplete = nullptr) const;

	bool IsVectorType(CompilerType *element_type = nullptr,
	uint64_t *size = nullptr) const;

	bool IsArrayOfScalarType() const;

	bool IsAggregateType() const;

	bool IsAnonymousType() const;

	bool IsScopedEnumerationType() const;

	bool IsBeingDefined() const;

	bool IsCharType() const;

	bool IsCompleteType() const;

	bool IsConst() const;

	bool IsCStringType(uint32_t &length) const;

	bool IsDefined() const;

	bool IsFloatingPointType(uint32_t &count, bool &is_complex) const;

	bool IsFunctionType() const;

	uint32_t IsHomogeneousAggregate(CompilerType *base_type_ptr) const;

	size_t GetNumberOfFunctionArguments() const;

	CompilerType GetFunctionArgumentAtIndex(const size_t index) const;

	bool IsVariadicFunctionType() const;

	bool IsFunctionPointerType() const;

	bool
	IsBlockPointerType(CompilerType *function_pointer_type_ptr = nullptr) const;

	bool IsIntegerType(bool &is_signed) const;

	bool IsEnumerationType(bool &is_signed) const;

	bool IsIntegerOrEnumerationType(bool &is_signed) const;

	bool IsPolymorphicClass() const;

	/// \param target_type Can pass nullptr.
	bool IsPossibleDynamicType(CompilerType *target_type, bool check_cplusplus,
	bool check_objc) const;

	bool IsPointerToScalarType() const;

	bool IsRuntimeGeneratedType() const;

	bool IsPointerType(CompilerType *pointee_type = nullptr) const;

	bool IsPointerOrReferenceType(CompilerType *pointee_type = nullptr) const;

	bool IsReferenceType(CompilerType *pointee_type = nullptr,
	bool *is_rvalue = nullptr) const;

	bool ShouldTreatScalarValueAsAddress() const;

	bool IsScalarType() const;

	bool IsTypedefType() const;

	bool IsVoidType() const;
	/// \}

	/// Type Completion.
	/// \{
	bool GetCompleteType() const;
	/// \}

	/// AST related queries.
	/// \{
	size_t GetPointerByteSize() const;
	/// \}

	/// Accessors.
	/// \{
	TypeSystem *GetTypeSystem() const { return m_type_system; }

	ConstString GetTypeName() const;

	ConstString GetDisplayTypeName() const;

	uint32_t
	GetTypeInfo(CompilerType *pointee_or_element_compiler_type = nullptr) const;

	lldb::LanguageType GetMinimumLanguage();

	lldb::opaque_compiler_type_t GetOpaqueQualType() const { return m_type; }

	lldb::TypeClass GetTypeClass() const;

	void SetCompilerType(TypeSystem *type_system,
	lldb::opaque_compiler_type_t type);

	unsigned GetTypeQualifiers() const;
	/// \}

	/// Creating related types.
	/// \{
	CompilerType GetArrayElementType(ExecutionContextScope *exe_scope) const;

	CompilerType GetArrayType(uint64_t size) const;

	CompilerType GetCanonicalType() const;

	CompilerType GetFullyUnqualifiedType() const;

	CompilerType GetEnumerationIntegerType() const;

	/// Returns -1 if this isn't a function of if the function doesn't
	/// have a prototype Returns a value >= 0 if there is a prototype.
	int GetFunctionArgumentCount() const;

	CompilerType GetFunctionArgumentTypeAtIndex(size_t idx) const;

	CompilerType GetFunctionReturnType() const;

	size_t GetNumMemberFunctions() const;

	TypeMemberFunctionImpl GetMemberFunctionAtIndex(size_t idx);

	/// If this type is a reference to a type (L value or R value reference),
	/// return a new type with the reference removed, else return the current type
	/// itself.
	CompilerType GetNonReferenceType() const;

	/// If this type is a pointer type, return the type that the pointer points
	/// to, else return an invalid type.
	CompilerType GetPointeeType() const;

	/// Return a new CompilerType that is a pointer to this type
	CompilerType GetPointerType() const;

	/// Return a new CompilerType that is a L value reference to this type if this
	/// type is valid and the type system supports L value references, else return
	/// an invalid type.
	CompilerType GetLValueReferenceType() const;

	/// Return a new CompilerType that is a R value reference to this type if this
	/// type is valid and the type system supports R value references, else return
	/// an invalid type.
	CompilerType GetRValueReferenceType() const;

	/// Return a new CompilerType adds a const modifier to this type if this type
	/// is valid and the type system supports const modifiers, else return an
	/// invalid type.
	CompilerType AddConstModifier() const;

	/// Return a new CompilerType adds a volatile modifier to this type if this
	/// type is valid and the type system supports volatile modifiers, else return
	/// an invalid type.
	CompilerType AddVolatileModifier() const;

	/// Return a new CompilerType that is the atomic type of this type. If this
	/// type is not valid or the type system doesn't support atomic types, this
	/// returns an invalid type.
	CompilerType GetAtomicType() const;

	/// Return a new CompilerType adds a restrict modifier to this type if this
	/// type is valid and the type system supports restrict modifiers, else return
	/// an invalid type.
	CompilerType AddRestrictModifier() const;

	/// Create a typedef to this type using "name" as the name of the typedef this
	/// type is valid and the type system supports typedefs, else return an
	/// invalid type.
	/// \param payload The typesystem-specific \p lldb::Type payload.
	CompilerType CreateTypedef(const char *name,
	const CompilerDeclContext &decl_ctx,
	uint32_t payload) const;

	/// If the current object represents a typedef type, get the underlying type
	CompilerType GetTypedefedType() const;

	/// Create related types using the current type's AST
	CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) const;
	/// \}

	/// Exploring the type.
	/// \{
	struct IntegralTemplateArgument;

	/// Return the size of the type in bytes.
	llvm::Optional<uint64_t> GetByteSize(ExecutionContextScope *exe_scope) const;
	/// Return the size of the type in bits.
	llvm::Optional<uint64_t> GetBitSize(ExecutionContextScope *exe_scope) const;

	lldb::Encoding GetEncoding(uint64_t &count) const;

	lldb::Format GetFormat() const;

	llvm::Optional<size_t>
	GetTypeBitAlign(ExecutionContextScope *exe_scope) const;

	uint32_t GetNumChildren(bool omit_empty_base_classes,
	const ExecutionContext *exe_ctx) const;

	lldb::BasicType GetBasicTypeEnumeration() const;

	static lldb::BasicType GetBasicTypeEnumeration(ConstString name);

	/// If this type is an enumeration, iterate through all of its enumerators
	/// using a callback. If the callback returns true, keep iterating, else abort
	/// the iteration.
	void ForEachEnumerator(
	std::function<bool(const CompilerType &integer_type, ConstString name,
	const llvm::APSInt &value)> const &callback) const;

	uint32_t GetNumFields() const;

	CompilerType GetFieldAtIndex(size_t idx, std::string &name,
	uint64_t *bit_offset_ptr,
	uint32_t *bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) const;

	uint32_t GetNumDirectBaseClasses() const;

	uint32_t GetNumVirtualBaseClasses() const;

	CompilerType GetDirectBaseClassAtIndex(size_t idx,
	uint32_t *bit_offset_ptr) const;

	CompilerType GetVirtualBaseClassAtIndex(size_t idx,
	uint32_t *bit_offset_ptr) const;

	uint32_t GetIndexOfFieldWithName(const char *name,
	CompilerType *field_compiler_type = nullptr,
	uint64_t *bit_offset_ptr = nullptr,
	uint32_t *bitfield_bit_size_ptr = nullptr,
	bool *is_bitfield_ptr = nullptr) const;

	CompilerType GetChildCompilerTypeAtIndex(
	ExecutionContext *exe_ctx, size_t idx, bool transparent_pointers,
	bool omit_empty_base_classes, bool ignore_array_bounds,
	std::string &child_name, uint32_t &child_byte_size,
	int32_t &child_byte_offset, uint32_t &child_bitfield_bit_size,
	uint32_t &child_bitfield_bit_offset, bool &child_is_base_class,
	bool &child_is_deref_of_parent, ValueObject *valobj,
	uint64_t &language_flags) const;

	/// Lookup a child given a name. This function will match base class names and
	/// member member names in "clang_type" only, not descendants.
	uint32_t GetIndexOfChildWithName(const char *name,
	bool omit_empty_base_classes) const;

	/// Lookup a child member given a name. This function will match member names
	/// only and will descend into "clang_type" children in search for the first
	/// member in this class, or any base class that matches "name".
	/// TODO: Return all matches for a given name by returning a
	/// vector<vector<uint32_t>>
	/// so we catch all names that match a given child name, not just the first.
	size_t
	GetIndexOfChildMemberWithName(const char *name, bool omit_empty_base_classes,
	std::vector<uint32_t> &child_indexes) const;

	- size_t GetNumTemplateArguments() const;
	-
	- lldb::TemplateArgumentKind GetTemplateArgumentKind(size_t idx) const;
	- CompilerType GetTypeTemplateArgument(size_t idx) const;
	+ /// Return the number of template arguments the type has.
	+ /// If expand_pack is true, then variadic argument packs are automatically
	+ /// expanded to their supplied arguments. If it is false an argument pack
	+ /// will only count as 1 argument.
	+ size_t GetNumTemplateArguments(bool expand_pack = false) const;
	+
	+ // Return the TemplateArgumentKind of the template argument at index idx.
	+ // If expand_pack is true, then variadic argument packs are automatically
	+ // expanded to their supplied arguments. With expand_pack set to false, an
	+ // arguement pack will count as 1 argument and return a type of Pack.
	+ lldb::TemplateArgumentKind
	+ GetTemplateArgumentKind(size_t idx, bool expand_pack = false) const;
	+ CompilerType GetTypeTemplateArgument(size_t idx,
	+ bool expand_pack = false) const;

	/// Returns the value of the template argument and its type.
	+ /// If expand_pack is true, then variadic argument packs are automatically
	+ /// expanded to their supplied arguments. With expand_pack set to false, an
	+ /// arguement pack will count as 1 argument and it is invalid to call this
	+ /// method on the pack argument.
	llvm::Optional<IntegralTemplateArgument>
	- GetIntegralTemplateArgument(size_t idx) const;
	+ GetIntegralTemplateArgument(size_t idx, bool expand_pack = false) const;

	CompilerType GetTypeForFormatters() const;

	LazyBool ShouldPrintAsOneLiner(ValueObject *valobj) const;

	bool IsMeaninglessWithoutDynamicResolution() const;
	/// \}

	/// Dumping types.
	/// \{
	#ifndef NDEBUG
	/// Convenience LLVM-style dump method for use in the debugger only.
	/// Don't call this function from actual code.
	LLVM_DUMP_METHOD void dump() const;
	#endif

	void DumpValue(ExecutionContext exe_ctx, Stream s, lldb::Format format,
	const DataExtractor &data, lldb::offset_t data_offset,
	size_t data_byte_size, uint32_t bitfield_bit_size,
	uint32_t bitfield_bit_offset, bool show_types,
	bool show_summary, bool verbose, uint32_t depth);

	bool DumpTypeValue(Stream *s, lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
	ExecutionContextScope *exe_scope);

	void DumpSummary(ExecutionContext exe_ctx, Stream s,
	const DataExtractor &data, lldb::offset_t data_offset,
	size_t data_byte_size);

	/// Dump to stdout.
	void DumpTypeDescription(lldb::DescriptionLevel level =
	lldb::eDescriptionLevelFull) const;

	/// Print a description of the type to a stream. The exact implementation
	/// varies, but the expectation is that eDescriptionLevelFull returns a
	/// source-like representation of the type, whereas eDescriptionLevelVerbose
	/// does a dump of the underlying AST if applicable.
	void DumpTypeDescription(Stream *s, lldb::DescriptionLevel level =
	lldb::eDescriptionLevelFull) const;
	/// \}

	bool GetValueAsScalar(const DataExtractor &data, lldb::offset_t data_offset,
	size_t data_byte_size, Scalar &value,
	ExecutionContextScope *exe_scope) const;
	void Clear() {
	m_type = nullptr;
	m_type_system = nullptr;
	}

	private:
	#ifndef NDEBUG
	/// If the type is valid, ask the TypeSystem to verify the integrity
	/// of the type to catch CompilerTypes that mix and match invalid
	/// TypeSystem/Opaque type pairs.
	bool Verify() const;
	#endif

	lldb::opaque_compiler_type_t m_type = nullptr;
	TypeSystem *m_type_system = nullptr;
	};

	bool operator==(const CompilerType &lhs, const CompilerType &rhs);
	bool operator!=(const CompilerType &lhs, const CompilerType &rhs);

	struct CompilerType::IntegralTemplateArgument {
	llvm::APSInt value;
	CompilerType type;
	};

	} // namespace lldb_private

	#endif // LLDB_SYMBOL_COMPILERTYPE_H
	diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
	index be5783596897..769449a4933b 100644
	--- a/lldb/include/lldb/Symbol/TypeSystem.h
	+++ b/lldb/include/lldb/Symbol/TypeSystem.h
	@@ -1,553 +1,557 @@
	//===-- TypeSystem.h ------------------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLDB_SYMBOL_TYPESYSTEM_H
	#define LLDB_SYMBOL_TYPESYSTEM_H

	#include <functional>
	#include <map>
	#include <mutex>
	#include <string>

	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/Error.h"

	#include "lldb/Core/PluginInterface.h"
	#include "lldb/Expression/Expression.h"
	#include "lldb/Symbol/CompilerDecl.h"
	#include "lldb/Symbol/CompilerDeclContext.h"
	#include "lldb/lldb-private.h"

	class DWARFDIE;
	class DWARFASTParser;
	class PDBASTParser;

	namespace lldb_private {

	/// A SmallBitVector that represents a set of source languages (\p
	/// lldb::LanguageType). Each lldb::LanguageType is represented by
	/// the bit with the position of its enumerator. The largest
	/// LanguageType is < 64, so this is space-efficient and on 64-bit
	/// architectures a LanguageSet can be completely stack-allocated.
	struct LanguageSet {
	llvm::SmallBitVector bitvector;
	LanguageSet();

	/// If the set contains a single language only, return it.
	llvm::Optional<lldb::LanguageType> GetSingularLanguage();
	void Insert(lldb::LanguageType language);
	bool Empty() const;
	size_t Size() const;
	bool operator[](unsigned i) const;
	};

	/// Interface for representing a type system.
	///
	/// Implemented by language plugins to define the type system for a given
	/// language.
	///
	/// This interface extensively used opaque pointers to prevent that generic
	/// LLDB code has dependencies on language plugins. The type and semantics of
	/// these opaque pointers are defined by the TypeSystem implementation inside
	/// the respective language plugin. Opaque pointers from one TypeSystem
	/// instance should never be passed to a different TypeSystem instance (even
	/// when the language plugin for both TypeSystem instances is the same).
	///
	/// Most of the functions in this class should not be called directly but only
	/// called by their respective counterparts in CompilerType, CompilerDecl and
	/// CompilerDeclContext.
	///
	/// \see lldb_private::CompilerType
	/// \see lldb_private::CompilerDecl
	/// \see lldb_private::CompilerDeclContext
	class TypeSystem : public PluginInterface {
	public:
	// Constructors and Destructors
	~TypeSystem() override;

	// LLVM RTTI support
	virtual bool isA(const void *ClassID) const = 0;

	static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
	Module *module);

	static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
	Target *target);

	// Free up any resources associated with this TypeSystem. Done before
	// removing all the TypeSystems from the TypeSystemMap.
	virtual void Finalize() {}

	virtual DWARFASTParser *GetDWARFParser() { return nullptr; }
	virtual PDBASTParser *GetPDBParser() { return nullptr; }

	virtual SymbolFile *GetSymbolFile() const { return m_sym_file; }

	virtual void SetSymbolFile(SymbolFile *sym_file) { m_sym_file = sym_file; }

	// CompilerDecl functions
	virtual ConstString DeclGetName(void *opaque_decl) = 0;

	virtual ConstString DeclGetMangledName(void *opaque_decl);

	virtual CompilerDeclContext DeclGetDeclContext(void *opaque_decl);

	virtual CompilerType DeclGetFunctionReturnType(void *opaque_decl);

	virtual size_t DeclGetFunctionNumArguments(void *opaque_decl);

	virtual CompilerType DeclGetFunctionArgumentType(void *opaque_decl,
	size_t arg_idx);

	virtual CompilerType GetTypeForDecl(void *opaque_decl) = 0;

	// CompilerDeclContext functions

	virtual std::vector<CompilerDecl>
	DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
	const bool ignore_imported_decls);

	virtual ConstString DeclContextGetName(void *opaque_decl_ctx) = 0;

	virtual ConstString
	DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) = 0;

	virtual bool DeclContextIsClassMethod(
	void opaque_decl_ctx, lldb::LanguageType language_ptr,
	bool is_instance_method_ptr, ConstString language_object_name_ptr) = 0;

	virtual bool DeclContextIsContainedInLookup(void *opaque_decl_ctx,
	void *other_opaque_decl_ctx) = 0;

	// Tests
	#ifndef NDEBUG
	/// Verify the integrity of the type to catch CompilerTypes that mix
	/// and match invalid TypeSystem/Opaque type pairs.
	virtual bool Verify(lldb::opaque_compiler_type_t type) = 0;
	#endif

	virtual bool IsArrayType(lldb::opaque_compiler_type_t type,
	CompilerType element_type, uint64_t size,
	bool *is_incomplete) = 0;

	virtual bool IsAggregateType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsAnonymousType(lldb::opaque_compiler_type_t type);

	virtual bool IsCharType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsCompleteType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsDefined(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsFloatingPointType(lldb::opaque_compiler_type_t type,
	uint32_t &count, bool &is_complex) = 0;

	virtual bool IsFunctionType(lldb::opaque_compiler_type_t type) = 0;

	virtual size_t
	GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
	const size_t index) = 0;

	virtual bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsBlockPointerType(lldb::opaque_compiler_type_t type,
	CompilerType *function_pointer_type_ptr) = 0;

	virtual bool IsIntegerType(lldb::opaque_compiler_type_t type,
	bool &is_signed) = 0;

	virtual bool IsEnumerationType(lldb::opaque_compiler_type_t type,
	bool &is_signed) {
	is_signed = false;
	return false;
	}

	virtual bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
	CompilerType *target_type, // Can pass NULL
	bool check_cplusplus, bool check_objc) = 0;

	virtual bool IsPointerType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type) = 0;

	virtual bool IsScalarType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsVoidType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool CanPassInRegisters(const CompilerType &type) = 0;

	// TypeSystems can support more than one language
	virtual bool SupportsLanguage(lldb::LanguageType language) = 0;

	// Type Completion

	virtual bool GetCompleteType(lldb::opaque_compiler_type_t type) = 0;

	// AST related queries

	virtual uint32_t GetPointerByteSize() = 0;

	// Accessors

	virtual ConstString GetTypeName(lldb::opaque_compiler_type_t type) = 0;

	virtual ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) = 0;

	virtual uint32_t
	GetTypeInfo(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_or_element_compiler_type) = 0;

	virtual lldb::LanguageType
	GetMinimumLanguage(lldb::opaque_compiler_type_t type) = 0;

	virtual lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) = 0;

	// Creating related types

	virtual CompilerType
	GetArrayElementType(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) = 0;

	virtual CompilerType GetArrayType(lldb::opaque_compiler_type_t type,
	uint64_t size);

	virtual CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) = 0;

	// Returns -1 if this isn't a function of if the function doesn't have a
	// prototype Returns a value >= 0 if there is a prototype.
	virtual int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx) = 0;

	virtual CompilerType
	GetFunctionReturnType(lldb::opaque_compiler_type_t type) = 0;

	virtual size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) = 0;

	virtual TypeMemberFunctionImpl
	GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, size_t idx) = 0;

	virtual CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType GetPointerType(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetLValueReferenceType(lldb::opaque_compiler_type_t type);

	virtual CompilerType
	GetRValueReferenceType(lldb::opaque_compiler_type_t type);

	virtual CompilerType GetAtomicType(lldb::opaque_compiler_type_t type);

	virtual CompilerType AddConstModifier(lldb::opaque_compiler_type_t type);

	virtual CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type);

	virtual CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type);

	/// \param opaque_payload The m_payload field of Type, which may
	/// carry TypeSystem-specific extra information.
	virtual CompilerType CreateTypedef(lldb::opaque_compiler_type_t type,
	const char *name,
	const CompilerDeclContext &decl_ctx,
	uint32_t opaque_payload);

	// Exploring the type

	virtual const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) = 0;

	virtual llvm::Optional<uint64_t>
	GetBitSize(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) = 0;

	virtual lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type,
	uint64_t &count) = 0;

	virtual lldb::Format GetFormat(lldb::opaque_compiler_type_t type) = 0;

	virtual uint32_t GetNumChildren(lldb::opaque_compiler_type_t type,
	bool omit_empty_base_classes,
	const ExecutionContext *exe_ctx) = 0;

	virtual CompilerType GetBuiltinTypeByName(ConstString name);

	virtual lldb::BasicType
	GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) = 0;

	virtual void ForEachEnumerator(
	lldb::opaque_compiler_type_t type,
	std::function<bool(const CompilerType &integer_type,
	ConstString name,
	const llvm::APSInt &value)> const &callback) {}

	virtual uint32_t GetNumFields(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx, std::string &name,
	uint64_t *bit_offset_ptr,
	uint32_t *bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) = 0;

	virtual uint32_t
	GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) = 0;

	virtual uint32_t
	GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
	uint32_t *bit_offset_ptr) = 0;

	virtual CompilerType
	GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
	uint32_t *bit_offset_ptr) = 0;

	virtual CompilerType GetChildCompilerTypeAtIndex(
	lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
	bool transparent_pointers, bool omit_empty_base_classes,
	bool ignore_array_bounds, std::string &child_name,
	uint32_t &child_byte_size, int32_t &child_byte_offset,
	uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
	bool &child_is_base_class, bool &child_is_deref_of_parent,
	ValueObject *valobj, uint64_t &language_flags) = 0;

	// Lookup a child given a name. This function will match base class names and
	// member member names in "clang_type" only, not descendants.
	virtual uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
	const char *name,
	bool omit_empty_base_classes) = 0;

	// Lookup a child member given a name. This function will match member names
	// only and will descend into "clang_type" children in search for the first
	// member in this class, or any base class that matches "name".
	// TODO: Return all matches for a given name by returning a
	// vector<vector<uint32_t>>
	// so we catch all names that match a given child name, not just the first.
	virtual size_t
	GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type,
	const char *name, bool omit_empty_base_classes,
	std::vector<uint32_t> &child_indexes) = 0;

	- virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type);
	+ virtual size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
	+ bool expand_pack);

	virtual lldb::TemplateArgumentKind
	- GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx);
	- virtual CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
	- size_t idx);
	+ GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack);
	+ virtual CompilerType
	+ GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack);
	virtual llvm::Optional<CompilerType::IntegralTemplateArgument>
	- GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx);
	+ GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack);

	// Dumping types

	#ifndef NDEBUG
	/// Convenience LLVM-style dump method for use in the debugger only.
	LLVM_DUMP_METHOD virtual void
	dump(lldb::opaque_compiler_type_t type) const = 0;
	#endif

	virtual void DumpValue(lldb::opaque_compiler_type_t type,
	ExecutionContext exe_ctx, Stream s,
	lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size,
	uint32_t bitfield_bit_offset, bool show_types,
	bool show_summary, bool verbose, uint32_t depth) = 0;

	virtual bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s,
	lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size,
	uint32_t bitfield_bit_offset,
	ExecutionContextScope *exe_scope) = 0;

	/// Dump the type to stdout.
	virtual void DumpTypeDescription(
	lldb::opaque_compiler_type_t type,
	lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;

	/// Print a description of the type to a stream. The exact implementation
	/// varies, but the expectation is that eDescriptionLevelFull returns a
	/// source-like representation of the type, whereas eDescriptionLevelVerbose
	/// does a dump of the underlying AST if applicable.
	virtual void DumpTypeDescription(
	lldb::opaque_compiler_type_t type, Stream *s,
	lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;

	/// Dump a textual representation of the internal TypeSystem state to the
	/// given stream.
	///
	/// This should not modify the state of the TypeSystem if possible.
	virtual void Dump(llvm::raw_ostream &output) = 0;

	// TODO: These methods appear unused. Should they be removed?

	virtual bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) = 0;

	virtual void DumpSummary(lldb::opaque_compiler_type_t type,
	ExecutionContext exe_ctx, Stream s,
	const DataExtractor &data,
	lldb::offset_t data_offset,
	size_t data_byte_size) = 0;

	// TODO: Determine if these methods should move to TypeSystemClang.

	virtual bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type) = 0;

	virtual unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsCStringType(lldb::opaque_compiler_type_t type,
	uint32_t &length) = 0;

	virtual llvm::Optional<size_t>
	GetTypeBitAlign(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) = 0;

	virtual CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) = 0;

	virtual CompilerType
	GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding,
	size_t bit_size) = 0;

	virtual bool IsBeingDefined(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsConst(lldb::opaque_compiler_type_t type) = 0;

	virtual uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
	CompilerType *base_type_ptr) = 0;

	virtual bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsTypedefType(lldb::opaque_compiler_type_t type) = 0;

	// If the current object represents a typedef type, get the underlying type
	virtual CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsVectorType(lldb::opaque_compiler_type_t type,
	CompilerType element_type, uint64_t size) = 0;

	virtual CompilerType
	GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) = 0;

	virtual CompilerType
	GetNonReferenceType(lldb::opaque_compiler_type_t type) = 0;

	virtual bool IsReferenceType(lldb::opaque_compiler_type_t type,
	CompilerType pointee_type, bool is_rvalue) = 0;

	virtual bool
	ShouldTreatScalarValueAsAddress(lldb::opaque_compiler_type_t type) {
	return IsPointerOrReferenceType(type, nullptr);
	}

	virtual UserExpression *
	GetUserExpression(llvm::StringRef expr, llvm::StringRef prefix,
	lldb::LanguageType language,
	Expression::ResultType desired_type,
	const EvaluateExpressionOptions &options,
	ValueObject *ctx_obj) {
	return nullptr;
	}

	virtual FunctionCaller *GetFunctionCaller(const CompilerType &return_type,
	const Address &function_address,
	const ValueList &arg_value_list,
	const char *name) {
	return nullptr;
	}

	virtual std::unique_ptr<UtilityFunction>
	CreateUtilityFunction(std::string text, std::string name);

	virtual PersistentExpressionState *GetPersistentExpressionState() {
	return nullptr;
	}

	virtual CompilerType GetTypeForFormatters(void *type);

	virtual LazyBool ShouldPrintAsOneLiner(void type, ValueObject valobj);

	// Type systems can have types that are placeholder types, which are meant to
	// indicate the presence of a type, but offer no actual information about
	// said types, and leave the burden of actually figuring type information out
	// to dynamic type resolution. For instance a language with a generics
	// system, can use placeholder types to indicate "type argument goes here",
	// without promising uniqueness of the placeholder, nor attaching any
	// actually idenfiable information to said placeholder. This API allows type
	// systems to tell LLDB when such a type has been encountered In response,
	// the debugger can react by not using this type as a cache entry in any
	// type-specific way For instance, LLDB will currently not cache any
	// formatters that are discovered on such a type as attributable to the
	// meaningless type itself, instead preferring to use the dynamic type
	virtual bool IsMeaninglessWithoutDynamicResolution(void *type);

	protected:
	SymbolFile *m_sym_file = nullptr;
	};

	class TypeSystemMap {
	public:
	TypeSystemMap();
	~TypeSystemMap();

	// Clear calls Finalize on all the TypeSystems managed by this map, and then
	// empties the map.
	void Clear();

	// Iterate through all of the type systems that are created. Return true from
	// callback to keep iterating, false to stop iterating.
	void ForEach(std::function<bool(TypeSystem *)> const &callback);

	llvm::Expected<TypeSystem &>
	GetTypeSystemForLanguage(lldb::LanguageType language, Module *module,
	bool can_create);

	llvm::Expected<TypeSystem &>
	GetTypeSystemForLanguage(lldb::LanguageType language, Target *target,
	bool can_create);

	protected:
	typedef std::map<lldb::LanguageType, lldb::TypeSystemSP> collection;
	mutable std::mutex m_mutex; ///< A mutex to keep this object happy in
	///multi-threaded environments.
	collection m_map;
	bool m_clear_in_progress = false;

	private:
	typedef llvm::function_ref<lldb::TypeSystemSP()> CreateCallback;
	/// Finds the type system for the given language. If no type system could be
	/// found for a language and a CreateCallback was provided, the value returned
	/// by the callback will be treated as the TypeSystem for the language.
	///
	/// \param language The language for which the type system should be found.
	/// \param create_callback A callback that will be called if no previously
	/// created TypeSystem that fits the given language
	/// could found. Can be omitted if a non-existent
	/// type system should be treated as an error instead.
	/// \return The found type system or an error.
	llvm::Expected<TypeSystem &> GetTypeSystemForLanguage(
	lldb::LanguageType language,
	llvm::Optional<CreateCallback> create_callback = llvm::None);
	};

	} // namespace lldb_private

	#endif // LLDB_SYMBOL_TYPESYSTEM_H
	diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp
	index 533930c0544b..adc60a084367 100644
	--- a/lldb/source/API/SBType.cpp
	+++ b/lldb/source/API/SBType.cpp
	@@ -1,893 +1,897 @@
	//===-- SBType.cpp --------------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "lldb/API/SBType.h"
	#include "lldb/API/SBDefines.h"
	#include "lldb/API/SBModule.h"
	#include "lldb/API/SBStream.h"
	#include "lldb/API/SBTypeEnumMember.h"
	#include "lldb/Core/Mangled.h"
	#include "lldb/Symbol/CompilerType.h"
	#include "lldb/Symbol/Type.h"
	#include "lldb/Symbol/TypeSystem.h"
	#include "lldb/Utility/ConstString.h"
	#include "lldb/Utility/Instrumentation.h"
	#include "lldb/Utility/Stream.h"

	#include "llvm/ADT/APSInt.h"

	#include <memory>

	using namespace lldb;
	using namespace lldb_private;

	SBType::SBType() { LLDB_INSTRUMENT_VA(this); }

	SBType::SBType(const CompilerType &type)
	: m_opaque_sp(new TypeImpl(
	CompilerType(type.GetTypeSystem(), type.GetOpaqueQualType()))) {}

	SBType::SBType(const lldb::TypeSP &type_sp)
	: m_opaque_sp(new TypeImpl(type_sp)) {}

	SBType::SBType(const lldb::TypeImplSP &type_impl_sp)
	: m_opaque_sp(type_impl_sp) {}

	SBType::SBType(const SBType &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs) {
	m_opaque_sp = rhs.m_opaque_sp;
	}
	}

	// SBType::SBType (TypeImpl* impl) :
	// m_opaque_up(impl)
	//{}
	//
	bool SBType::operator==(SBType &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (!IsValid())
	return !rhs.IsValid();

	if (!rhs.IsValid())
	return false;

	return m_opaque_sp.get() == rhs.m_opaque_sp.get();
	}

	bool SBType::operator!=(SBType &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (!IsValid())
	return rhs.IsValid();

	if (!rhs.IsValid())
	return true;

	return m_opaque_sp.get() != rhs.m_opaque_sp.get();
	}

	lldb::TypeImplSP SBType::GetSP() { return m_opaque_sp; }

	void SBType::SetSP(const lldb::TypeImplSP &type_impl_sp) {
	m_opaque_sp = type_impl_sp;
	}

	SBType &SBType::operator=(const SBType &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs) {
	m_opaque_sp = rhs.m_opaque_sp;
	}
	return *this;
	}

	SBType::~SBType() = default;

	TypeImpl &SBType::ref() {
	if (m_opaque_sp.get() == nullptr)
	m_opaque_sp = std::make_shared<TypeImpl>();
	return *m_opaque_sp;
	}

	const TypeImpl &SBType::ref() const {
	// "const SBAddress &addr" should already have checked "addr.IsValid()" prior
	// to calling this function. In case you didn't we will assert and die to let
	// you know.
	assert(m_opaque_sp.get());
	return *m_opaque_sp;
	}

	bool SBType::IsValid() const {
	LLDB_INSTRUMENT_VA(this);
	return this->operator bool();
	}
	SBType::operator bool() const {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp.get() == nullptr)
	return false;

	return m_opaque_sp->IsValid();
	}

	uint64_t SBType::GetByteSize() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	if (llvm::Optional<uint64_t> size =
	m_opaque_sp->GetCompilerType(false).GetByteSize(nullptr))
	return *size;
	return 0;
	}

	bool SBType::IsPointerType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsPointerType();
	}

	bool SBType::IsArrayType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsArrayType(nullptr, nullptr,
	nullptr);
	}

	bool SBType::IsVectorType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsVectorType(nullptr, nullptr);
	}

	bool SBType::IsReferenceType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsReferenceType();
	}

	SBType SBType::GetPointerType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();

	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetPointerType())));
	}

	SBType SBType::GetPointeeType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetPointeeType())));
	}

	SBType SBType::GetReferenceType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetReferenceType())));
	}

	SBType SBType::GetTypedefedType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetTypedefedType())));
	}

	SBType SBType::GetDereferencedType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetDereferencedType())));
	}

	SBType SBType::GetArrayElementType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(
	m_opaque_sp->GetCompilerType(true).GetArrayElementType(nullptr))));
	}

	SBType SBType::GetArrayType(uint64_t size) {
	LLDB_INSTRUMENT_VA(this, size);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(
	new TypeImpl(m_opaque_sp->GetCompilerType(true).GetArrayType(size))));
	}

	SBType SBType::GetVectorElementType() {
	LLDB_INSTRUMENT_VA(this);

	SBType type_sb;
	if (IsValid()) {
	CompilerType vector_element_type;
	if (m_opaque_sp->GetCompilerType(true).IsVectorType(&vector_element_type,
	nullptr))
	type_sb.SetSP(TypeImplSP(new TypeImpl(vector_element_type)));
	}
	return type_sb;
	}

	bool SBType::IsFunctionType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsFunctionType();
	}

	bool SBType::IsPolymorphicClass() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsPolymorphicClass();
	}

	bool SBType::IsTypedefType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsTypedefType();
	}

	bool SBType::IsAnonymousType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsAnonymousType();
	}

	bool SBType::IsScopedEnumerationType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsScopedEnumerationType();
	}

	bool SBType::IsAggregateType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(true).IsAggregateType();
	}

	lldb::SBType SBType::GetFunctionReturnType() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid()) {
	CompilerType return_type(
	m_opaque_sp->GetCompilerType(true).GetFunctionReturnType());
	if (return_type.IsValid())
	return SBType(return_type);
	}
	return lldb::SBType();
	}

	lldb::SBTypeList SBType::GetFunctionArgumentTypes() {
	LLDB_INSTRUMENT_VA(this);

	SBTypeList sb_type_list;
	if (IsValid()) {
	CompilerType func_type(m_opaque_sp->GetCompilerType(true));
	size_t count = func_type.GetNumberOfFunctionArguments();
	for (size_t i = 0; i < count; i++) {
	sb_type_list.Append(SBType(func_type.GetFunctionArgumentAtIndex(i)));
	}
	}
	return sb_type_list;
	}

	uint32_t SBType::GetNumberOfMemberFunctions() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid()) {
	return m_opaque_sp->GetCompilerType(true).GetNumMemberFunctions();
	}
	return 0;
	}

	lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	SBTypeMemberFunction sb_func_type;
	if (IsValid())
	sb_func_type.reset(new TypeMemberFunctionImpl(
	m_opaque_sp->GetCompilerType(true).GetMemberFunctionAtIndex(idx)));
	return sb_func_type;
	}

	lldb::SBType SBType::GetUnqualifiedType() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return SBType();
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetUnqualifiedType())));
	}

	lldb::SBType SBType::GetCanonicalType() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetCanonicalType())));
	return SBType();
	}

	SBType SBType::GetEnumerationIntegerType() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid()) {
	return SBType(
	m_opaque_sp->GetCompilerType(true).GetEnumerationIntegerType());
	}
	return SBType();
	}

	lldb::BasicType SBType::GetBasicType() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return m_opaque_sp->GetCompilerType(false).GetBasicTypeEnumeration();
	return eBasicTypeInvalid;
	}

	SBType SBType::GetBasicType(lldb::BasicType basic_type) {
	LLDB_INSTRUMENT_VA(this, basic_type);

	if (IsValid() && m_opaque_sp->IsValid())
	return SBType(
	m_opaque_sp->GetTypeSystem(false)->GetBasicTypeFromAST(basic_type));
	return SBType();
	}

	uint32_t SBType::GetNumberOfDirectBaseClasses() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return m_opaque_sp->GetCompilerType(true).GetNumDirectBaseClasses();
	return 0;
	}

	uint32_t SBType::GetNumberOfVirtualBaseClasses() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return m_opaque_sp->GetCompilerType(true).GetNumVirtualBaseClasses();
	return 0;
	}

	uint32_t SBType::GetNumberOfFields() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return m_opaque_sp->GetCompilerType(true).GetNumFields();
	return 0;
	}

	bool SBType::GetDescription(SBStream &description,
	lldb::DescriptionLevel description_level) {
	LLDB_INSTRUMENT_VA(this, description, description_level);

	Stream &strm = description.ref();

	if (m_opaque_sp) {
	m_opaque_sp->GetDescription(strm, description_level);
	} else
	strm.PutCString("No value");

	return true;
	}

	SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	SBTypeMember sb_type_member;
	if (IsValid()) {
	uint32_t bit_offset = 0;
	CompilerType base_class_type =
	m_opaque_sp->GetCompilerType(true).GetDirectBaseClassAtIndex(
	idx, &bit_offset);
	if (base_class_type.IsValid())
	sb_type_member.reset(new TypeMemberImpl(
	TypeImplSP(new TypeImpl(base_class_type)), bit_offset));
	}
	return sb_type_member;
	}

	SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	SBTypeMember sb_type_member;
	if (IsValid()) {
	uint32_t bit_offset = 0;
	CompilerType base_class_type =
	m_opaque_sp->GetCompilerType(true).GetVirtualBaseClassAtIndex(
	idx, &bit_offset);
	if (base_class_type.IsValid())
	sb_type_member.reset(new TypeMemberImpl(
	TypeImplSP(new TypeImpl(base_class_type)), bit_offset));
	}
	return sb_type_member;
	}

	SBTypeEnumMemberList SBType::GetEnumMembers() {
	LLDB_INSTRUMENT_VA(this);

	SBTypeEnumMemberList sb_enum_member_list;
	if (IsValid()) {
	CompilerType this_type(m_opaque_sp->GetCompilerType(true));
	if (this_type.IsValid()) {
	this_type.ForEachEnumerator([&sb_enum_member_list](
	const CompilerType &integer_type,
	ConstString name,
	const llvm::APSInt &value) -> bool {
	SBTypeEnumMember enum_member(
	lldb::TypeEnumMemberImplSP(new TypeEnumMemberImpl(
	lldb::TypeImplSP(new TypeImpl(integer_type)), name, value)));
	sb_enum_member_list.Append(enum_member);
	return true; // Keep iterating
	});
	}
	}
	return sb_enum_member_list;
	}

	SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	SBTypeMember sb_type_member;
	if (IsValid()) {
	CompilerType this_type(m_opaque_sp->GetCompilerType(false));
	if (this_type.IsValid()) {
	uint64_t bit_offset = 0;
	uint32_t bitfield_bit_size = 0;
	bool is_bitfield = false;
	std::string name_sstr;
	CompilerType field_type(this_type.GetFieldAtIndex(
	idx, name_sstr, &bit_offset, &bitfield_bit_size, &is_bitfield));
	if (field_type.IsValid()) {
	ConstString name;
	if (!name_sstr.empty())
	name.SetCString(name_sstr.c_str());
	sb_type_member.reset(
	new TypeMemberImpl(TypeImplSP(new TypeImpl(field_type)), bit_offset,
	name, bitfield_bit_size, is_bitfield));
	}
	}
	}
	return sb_type_member;
	}

	bool SBType::IsTypeComplete() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return false;
	return m_opaque_sp->GetCompilerType(false).IsCompleteType();
	}

	uint32_t SBType::GetTypeFlags() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return 0;
	return m_opaque_sp->GetCompilerType(true).GetTypeInfo();
	}

	lldb::SBModule SBType::GetModule() {
	LLDB_INSTRUMENT_VA(this);

	lldb::SBModule sb_module;
	if (!IsValid())
	return sb_module;

	sb_module.SetSP(m_opaque_sp->GetModule());
	return sb_module;
	}

	const char *SBType::GetName() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return "";
	return m_opaque_sp->GetName().GetCString();
	}

	const char *SBType::GetDisplayTypeName() {
	LLDB_INSTRUMENT_VA(this);

	if (!IsValid())
	return "";
	return m_opaque_sp->GetDisplayTypeName().GetCString();
	}

	lldb::TypeClass SBType::GetTypeClass() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	return m_opaque_sp->GetCompilerType(true).GetTypeClass();
	return lldb::eTypeClassInvalid;
	}

	uint32_t SBType::GetNumberOfTemplateArguments() {
	LLDB_INSTRUMENT_VA(this);

	if (IsValid())
	- return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments();
	+ return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments(
	+ /expand_pack=/true);
	return 0;
	}

	lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	if (!IsValid())
	return SBType();

	CompilerType type;
	+ const bool expand_pack = true;
	switch(GetTemplateArgumentKind(idx)) {
	case eTemplateArgumentKindType:
	- type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(idx);
	+ type = m_opaque_sp->GetCompilerType(false).GetTypeTemplateArgument(
	+ idx, expand_pack);
	break;
	case eTemplateArgumentKindIntegral:
	type = m_opaque_sp->GetCompilerType(false)
	- .GetIntegralTemplateArgument(idx)
	+ .GetIntegralTemplateArgument(idx, expand_pack)
	->type;
	break;
	default:
	break;
	}
	if (type.IsValid())
	return SBType(type);
	return SBType();
	}

	lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) {
	LLDB_INSTRUMENT_VA(this, idx);

	if (IsValid())
	- return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx);
	+ return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(
	+ idx, /expand_pack=/true);
	return eTemplateArgumentKindNull;
	}

	SBTypeList::SBTypeList() : m_opaque_up(new TypeListImpl()) {
	LLDB_INSTRUMENT_VA(this);
	}

	SBTypeList::SBTypeList(const SBTypeList &rhs)
	: m_opaque_up(new TypeListImpl()) {
	LLDB_INSTRUMENT_VA(this, rhs);

	for (uint32_t i = 0, rhs_size = const_cast<SBTypeList &>(rhs).GetSize();
	i < rhs_size; i++)
	Append(const_cast<SBTypeList &>(rhs).GetTypeAtIndex(i));
	}

	bool SBTypeList::IsValid() {
	LLDB_INSTRUMENT_VA(this);
	return this->operator bool();
	}
	SBTypeList::operator bool() const {
	LLDB_INSTRUMENT_VA(this);

	return (m_opaque_up != nullptr);
	}

	SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs) {
	m_opaque_up = std::make_unique<TypeListImpl>();
	for (uint32_t i = 0, rhs_size = const_cast<SBTypeList &>(rhs).GetSize();
	i < rhs_size; i++)
	Append(const_cast<SBTypeList &>(rhs).GetTypeAtIndex(i));
	}
	return *this;
	}

	void SBTypeList::Append(SBType type) {
	LLDB_INSTRUMENT_VA(this, type);

	if (type.IsValid())
	m_opaque_up->Append(type.m_opaque_sp);
	}

	SBType SBTypeList::GetTypeAtIndex(uint32_t index) {
	LLDB_INSTRUMENT_VA(this, index);

	if (m_opaque_up)
	return SBType(m_opaque_up->GetTypeAtIndex(index));
	return SBType();
	}

	uint32_t SBTypeList::GetSize() {
	LLDB_INSTRUMENT_VA(this);

	return m_opaque_up->GetSize();
	}

	SBTypeList::~SBTypeList() = default;

	SBTypeMember::SBTypeMember() { LLDB_INSTRUMENT_VA(this); }

	SBTypeMember::~SBTypeMember() = default;

	SBTypeMember::SBTypeMember(const SBTypeMember &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs) {
	if (rhs.IsValid())
	m_opaque_up = std::make_unique<TypeMemberImpl>(rhs.ref());
	}
	}

	lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs) {
	if (rhs.IsValid())
	m_opaque_up = std::make_unique<TypeMemberImpl>(rhs.ref());
	}
	return *this;
	}

	bool SBTypeMember::IsValid() const {
	LLDB_INSTRUMENT_VA(this);
	return this->operator bool();
	}
	SBTypeMember::operator bool() const {
	LLDB_INSTRUMENT_VA(this);

	return m_opaque_up.get();
	}

	const char *SBTypeMember::GetName() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_up)
	return m_opaque_up->GetName().GetCString();
	return nullptr;
	}

	SBType SBTypeMember::GetType() {
	LLDB_INSTRUMENT_VA(this);

	SBType sb_type;
	if (m_opaque_up) {
	sb_type.SetSP(m_opaque_up->GetTypeImpl());
	}
	return sb_type;
	}

	uint64_t SBTypeMember::GetOffsetInBytes() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_up)
	return m_opaque_up->GetBitOffset() / 8u;
	return 0;
	}

	uint64_t SBTypeMember::GetOffsetInBits() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_up)
	return m_opaque_up->GetBitOffset();
	return 0;
	}

	bool SBTypeMember::IsBitfield() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_up)
	return m_opaque_up->GetIsBitfield();
	return false;
	}

	uint32_t SBTypeMember::GetBitfieldSizeInBits() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_up)
	return m_opaque_up->GetBitfieldBitSize();
	return 0;
	}

	bool SBTypeMember::GetDescription(lldb::SBStream &description,
	lldb::DescriptionLevel description_level) {
	LLDB_INSTRUMENT_VA(this, description, description_level);

	Stream &strm = description.ref();

	if (m_opaque_up) {
	const uint32_t bit_offset = m_opaque_up->GetBitOffset();
	const uint32_t byte_offset = bit_offset / 8u;
	const uint32_t byte_bit_offset = bit_offset % 8u;
	const char *name = m_opaque_up->GetName().GetCString();
	if (byte_bit_offset)
	strm.Printf("+%u + %u bits: (", byte_offset, byte_bit_offset);
	else
	strm.Printf("+%u: (", byte_offset);

	TypeImplSP type_impl_sp(m_opaque_up->GetTypeImpl());
	if (type_impl_sp)
	type_impl_sp->GetDescription(strm, description_level);

	strm.Printf(") %s", name);
	if (m_opaque_up->GetIsBitfield()) {
	const uint32_t bitfield_bit_size = m_opaque_up->GetBitfieldBitSize();
	strm.Printf(" : %u", bitfield_bit_size);
	}
	} else {
	strm.PutCString("No value");
	}
	return true;
	}

	void SBTypeMember::reset(TypeMemberImpl *type_member_impl) {
	m_opaque_up.reset(type_member_impl);
	}

	TypeMemberImpl &SBTypeMember::ref() {
	if (m_opaque_up == nullptr)
	m_opaque_up = std::make_unique<TypeMemberImpl>();
	return *m_opaque_up;
	}

	const TypeMemberImpl &SBTypeMember::ref() const { return *m_opaque_up; }

	SBTypeMemberFunction::SBTypeMemberFunction() { LLDB_INSTRUMENT_VA(this); }

	SBTypeMemberFunction::~SBTypeMemberFunction() = default;

	SBTypeMemberFunction::SBTypeMemberFunction(const SBTypeMemberFunction &rhs)
	: m_opaque_sp(rhs.m_opaque_sp) {
	LLDB_INSTRUMENT_VA(this, rhs);
	}

	lldb::SBTypeMemberFunction &SBTypeMemberFunction::
	operator=(const lldb::SBTypeMemberFunction &rhs) {
	LLDB_INSTRUMENT_VA(this, rhs);

	if (this != &rhs)
	m_opaque_sp = rhs.m_opaque_sp;
	return *this;
	}

	bool SBTypeMemberFunction::IsValid() const {
	LLDB_INSTRUMENT_VA(this);
	return this->operator bool();
	}
	SBTypeMemberFunction::operator bool() const {
	LLDB_INSTRUMENT_VA(this);

	return m_opaque_sp.get();
	}

	const char *SBTypeMemberFunction::GetName() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp)
	return m_opaque_sp->GetName().GetCString();
	return nullptr;
	}

	const char *SBTypeMemberFunction::GetDemangledName() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp) {
	ConstString mangled_str = m_opaque_sp->GetMangledName();
	if (mangled_str) {
	Mangled mangled(mangled_str);
	return mangled.GetDemangledName().GetCString();
	}
	}
	return nullptr;
	}

	const char *SBTypeMemberFunction::GetMangledName() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp)
	return m_opaque_sp->GetMangledName().GetCString();
	return nullptr;
	}

	SBType SBTypeMemberFunction::GetType() {
	LLDB_INSTRUMENT_VA(this);

	SBType sb_type;
	if (m_opaque_sp) {
	sb_type.SetSP(lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetType())));
	}
	return sb_type;
	}

	lldb::SBType SBTypeMemberFunction::GetReturnType() {
	LLDB_INSTRUMENT_VA(this);

	SBType sb_type;
	if (m_opaque_sp) {
	sb_type.SetSP(lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetReturnType())));
	}
	return sb_type;
	}

	uint32_t SBTypeMemberFunction::GetNumberOfArguments() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp)
	return m_opaque_sp->GetNumArguments();
	return 0;
	}

	lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) {
	LLDB_INSTRUMENT_VA(this, i);

	SBType sb_type;
	if (m_opaque_sp) {
	sb_type.SetSP(
	lldb::TypeImplSP(new TypeImpl(m_opaque_sp->GetArgumentAtIndex(i))));
	}
	return sb_type;
	}

	lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() {
	LLDB_INSTRUMENT_VA(this);

	if (m_opaque_sp)
	return m_opaque_sp->GetKind();
	return lldb::eMemberFunctionKindUnknown;
	}

	bool SBTypeMemberFunction::GetDescription(
	lldb::SBStream &description, lldb::DescriptionLevel description_level) {
	LLDB_INSTRUMENT_VA(this, description, description_level);

	Stream &strm = description.ref();

	if (m_opaque_sp)
	return m_opaque_sp->GetDescription(strm);

	return false;
	}

	void SBTypeMemberFunction::reset(TypeMemberFunctionImpl *type_member_impl) {
	m_opaque_sp.reset(type_member_impl);
	}

	TypeMemberFunctionImpl &SBTypeMemberFunction::ref() {
	if (!m_opaque_sp)
	m_opaque_sp = std::make_shared<TypeMemberFunctionImpl>();
	return *m_opaque_sp.get();
	}

	const TypeMemberFunctionImpl &SBTypeMemberFunction::ref() const {
	return *m_opaque_sp.get();
	}
	diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
	index c6eb693bba6b..a1ebe5830bb9 100644
	--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
	+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
	@@ -1,9928 +1,9971 @@
	//===-- TypeSystemClang.cpp -----------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "TypeSystemClang.h"

	#include "llvm/Support/FormatAdapters.h"
	#include "llvm/Support/FormatVariadic.h"

	#include <mutex>
	#include <string>
	#include <vector>

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTImporter.h"
	#include "clang/AST/Attr.h"
	#include "clang/AST/CXXInheritance.h"
	#include "clang/AST/DeclObjC.h"
	#include "clang/AST/DeclTemplate.h"
	#include "clang/AST/Mangle.h"
	#include "clang/AST/RecordLayout.h"
	#include "clang/AST/Type.h"
	#include "clang/AST/VTableBuilder.h"
	#include "clang/Basic/Builtins.h"
	#include "clang/Basic/Diagnostic.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/FileSystemOptions.h"
	#include "clang/Basic/LangStandard.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Basic/TargetInfo.h"
	#include "clang/Basic/TargetOptions.h"
	#include "clang/Frontend/FrontendOptions.h"
	#include "clang/Lex/HeaderSearch.h"
	#include "clang/Lex/HeaderSearchOptions.h"
	#include "clang/Lex/ModuleMap.h"
	#include "clang/Sema/Sema.h"

	#include "llvm/Support/Signals.h"
	#include "llvm/Support/Threading.h"

	#include "Plugins/ExpressionParser/Clang/ClangASTImporter.h"
	#include "Plugins/ExpressionParser/Clang/ClangASTMetadata.h"
	#include "Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h"
	#include "Plugins/ExpressionParser/Clang/ClangFunctionCaller.h"
	#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
	#include "Plugins/ExpressionParser/Clang/ClangUserExpression.h"
	#include "Plugins/ExpressionParser/Clang/ClangUtil.h"
	#include "Plugins/ExpressionParser/Clang/ClangUtilityFunction.h"
	#include "lldb/Core/DumpDataExtractor.h"
	#include "lldb/Core/Module.h"
	#include "lldb/Core/PluginManager.h"
	#include "lldb/Core/StreamFile.h"
	#include "lldb/Core/ThreadSafeDenseMap.h"
	#include "lldb/Core/UniqueCStringMap.h"
	#include "lldb/Symbol/ObjectFile.h"
	#include "lldb/Symbol/SymbolFile.h"
	#include "lldb/Target/ExecutionContext.h"
	#include "lldb/Target/Language.h"
	#include "lldb/Target/Process.h"
	#include "lldb/Target/Target.h"
	#include "lldb/Utility/ArchSpec.h"
	#include "lldb/Utility/DataExtractor.h"
	#include "lldb/Utility/Flags.h"
	#include "lldb/Utility/LLDBAssert.h"
	#include "lldb/Utility/LLDBLog.h"
	#include "lldb/Utility/RegularExpression.h"
	#include "lldb/Utility/Scalar.h"

	#include "Plugins/LanguageRuntime/ObjC/ObjCLanguageRuntime.h"
	#include "Plugins/SymbolFile/DWARF/DWARFASTParserClang.h"
	#include "Plugins/SymbolFile/PDB/PDBASTParser.h"

	#include <cstdio>

	#include <mutex>

	using namespace lldb;
	using namespace lldb_private;
	using namespace lldb_private::dwarf;
	using namespace clang;
	using llvm::StringSwitch;

	LLDB_PLUGIN_DEFINE(TypeSystemClang)

	namespace {
	static void VerifyDecl(clang::Decl *decl) {
	assert(decl && "VerifyDecl called with nullptr?");
	#ifndef NDEBUG
	// We don't care about the actual access value here but only want to trigger
	// that Clang calls its internal Decl::AccessDeclContextCheck validation.
	decl->getAccess();
	#endif
	}

	static inline bool
	TypeSystemClangSupportsLanguage(lldb::LanguageType language) {
	return language == eLanguageTypeUnknown \|\| // Clang is the default type system
	lldb_private::Language::LanguageIsC(language) \|\|
	lldb_private::Language::LanguageIsCPlusPlus(language) \|\|
	lldb_private::Language::LanguageIsObjC(language) \|\|
	lldb_private::Language::LanguageIsPascal(language) \|\|
	// Use Clang for Rust until there is a proper language plugin for it
	language == eLanguageTypeRust \|\|
	language == eLanguageTypeExtRenderScript \|\|
	// Use Clang for D until there is a proper language plugin for it
	language == eLanguageTypeD \|\|
	// Open Dylan compiler debug info is designed to be Clang-compatible
	language == eLanguageTypeDylan;
	}

	// Checks whether m1 is an overload of m2 (as opposed to an override). This is
	// called by addOverridesForMethod to distinguish overrides (which share a
	// vtable entry) from overloads (which require distinct entries).
	bool isOverload(clang::CXXMethodDecl m1, clang::CXXMethodDecl m2) {
	// FIXME: This should detect covariant return types, but currently doesn't.
	lldbassert(&m1->getASTContext() == &m2->getASTContext() &&
	"Methods should have the same AST context");
	clang::ASTContext &context = m1->getASTContext();

	const auto *m1Type = llvm::cast<clang::FunctionProtoType>(
	context.getCanonicalType(m1->getType()));

	const auto *m2Type = llvm::cast<clang::FunctionProtoType>(
	context.getCanonicalType(m2->getType()));

	auto compareArgTypes = [&context](const clang::QualType &m1p,
	const clang::QualType &m2p) {
	return context.hasSameType(m1p.getUnqualifiedType(),
	m2p.getUnqualifiedType());
	};

	// FIXME: In C++14 and later, we can just pass m2Type->param_type_end()
	// as a fourth parameter to std::equal().
	return (m1->getNumParams() != m2->getNumParams()) \|\|
	!std::equal(m1Type->param_type_begin(), m1Type->param_type_end(),
	m2Type->param_type_begin(), compareArgTypes);
	}

	// If decl is a virtual method, walk the base classes looking for methods that
	// decl overrides. This table of overridden methods is used by IRGen to
	// determine the vtable layout for decl's parent class.
	void addOverridesForMethod(clang::CXXMethodDecl *decl) {
	if (!decl->isVirtual())
	return;

	clang::CXXBasePaths paths;
	llvm::SmallVector<clang::NamedDecl *, 4> decls;

	auto find_overridden_methods =
	[&decls, decl](const clang::CXXBaseSpecifier *specifier,
	clang::CXXBasePath &path) {
	if (auto *base_record = llvm::dyn_cast<clang::CXXRecordDecl>(
	specifier->getType()->castAs<clang::RecordType>()->getDecl())) {

	clang::DeclarationName name = decl->getDeclName();

	// If this is a destructor, check whether the base class destructor is
	// virtual.
	if (name.getNameKind() == clang::DeclarationName::CXXDestructorName)
	if (auto *baseDtorDecl = base_record->getDestructor()) {
	if (baseDtorDecl->isVirtual()) {
	decls.push_back(baseDtorDecl);
	return true;
	} else
	return false;
	}

	// Otherwise, search for name in the base class.
	for (path.Decls = base_record->lookup(name).begin();
	path.Decls != path.Decls.end(); ++path.Decls) {
	if (auto *method_decl =
	llvm::dyn_cast<clang::CXXMethodDecl>(*path.Decls))
	if (method_decl->isVirtual() && !isOverload(decl, method_decl)) {
	decls.push_back(method_decl);
	return true;
	}
	}
	}

	return false;
	};

	if (decl->getParent()->lookupInBases(find_overridden_methods, paths)) {
	for (auto *overridden_decl : decls)
	decl->addOverriddenMethod(
	llvm::cast<clang::CXXMethodDecl>(overridden_decl));
	}
	}
	}

	static lldb::addr_t GetVTableAddress(Process &process,
	VTableContextBase &vtable_ctx,
	ValueObject &valobj,
	const ASTRecordLayout &record_layout) {
	// Retrieve type info
	CompilerType pointee_type;
	CompilerType this_type(valobj.GetCompilerType());
	uint32_t type_info = this_type.GetTypeInfo(&pointee_type);
	if (!type_info)
	return LLDB_INVALID_ADDRESS;

	// Check if it's a pointer or reference
	bool ptr_or_ref = false;
	if (type_info & (eTypeIsPointer \| eTypeIsReference)) {
	ptr_or_ref = true;
	type_info = pointee_type.GetTypeInfo();
	}

	// We process only C++ classes
	const uint32_t cpp_class = eTypeIsClass \| eTypeIsCPlusPlus;
	if ((type_info & cpp_class) != cpp_class)
	return LLDB_INVALID_ADDRESS;

	// Calculate offset to VTable pointer
	lldb::offset_t vbtable_ptr_offset =
	vtable_ctx.isMicrosoft() ? record_layout.getVBPtrOffset().getQuantity()
	: 0;

	if (ptr_or_ref) {
	// We have a pointer / ref to object, so read
	// VTable pointer from process memory

	if (valobj.GetAddressTypeOfChildren() != eAddressTypeLoad)
	return LLDB_INVALID_ADDRESS;

	auto vbtable_ptr_addr = valobj.GetValueAsUnsigned(LLDB_INVALID_ADDRESS);
	if (vbtable_ptr_addr == LLDB_INVALID_ADDRESS)
	return LLDB_INVALID_ADDRESS;

	vbtable_ptr_addr += vbtable_ptr_offset;

	Status err;
	return process.ReadPointerFromMemory(vbtable_ptr_addr, err);
	}

	// We have an object already read from process memory,
	// so just extract VTable pointer from it

	DataExtractor data;
	Status err;
	auto size = valobj.GetData(data, err);
	if (err.Fail() \|\| vbtable_ptr_offset + data.GetAddressByteSize() > size)
	return LLDB_INVALID_ADDRESS;

	return data.GetAddress(&vbtable_ptr_offset);
	}

	static int64_t ReadVBaseOffsetFromVTable(Process &process,
	VTableContextBase &vtable_ctx,
	lldb::addr_t vtable_ptr,
	const CXXRecordDecl *cxx_record_decl,
	const CXXRecordDecl *base_class_decl) {
	if (vtable_ctx.isMicrosoft()) {
	clang::MicrosoftVTableContext &msoft_vtable_ctx =
	static_cast<clang::MicrosoftVTableContext &>(vtable_ctx);

	// Get the index into the virtual base table. The
	// index is the index in uint32_t from vbtable_ptr
	const unsigned vbtable_index =
	msoft_vtable_ctx.getVBTableIndex(cxx_record_decl, base_class_decl);
	const lldb::addr_t base_offset_addr = vtable_ptr + vbtable_index * 4;
	Status err;
	return process.ReadSignedIntegerFromMemory(base_offset_addr, 4, INT64_MAX,
	err);
	}

	clang::ItaniumVTableContext &itanium_vtable_ctx =
	static_cast<clang::ItaniumVTableContext &>(vtable_ctx);

	clang::CharUnits base_offset_offset =
	itanium_vtable_ctx.getVirtualBaseOffsetOffset(cxx_record_decl,
	base_class_decl);
	const lldb::addr_t base_offset_addr =
	vtable_ptr + base_offset_offset.getQuantity();
	const uint32_t base_offset_size = process.GetAddressByteSize();
	Status err;
	return process.ReadSignedIntegerFromMemory(base_offset_addr, base_offset_size,
	INT64_MAX, err);
	}

	static bool GetVBaseBitOffset(VTableContextBase &vtable_ctx,
	ValueObject &valobj,
	const ASTRecordLayout &record_layout,
	const CXXRecordDecl *cxx_record_decl,
	const CXXRecordDecl *base_class_decl,
	int32_t &bit_offset) {
	ExecutionContext exe_ctx(valobj.GetExecutionContextRef());
	Process *process = exe_ctx.GetProcessPtr();
	if (!process)
	return false;

	lldb::addr_t vtable_ptr =
	GetVTableAddress(*process, vtable_ctx, valobj, record_layout);
	if (vtable_ptr == LLDB_INVALID_ADDRESS)
	return false;

	auto base_offset = ReadVBaseOffsetFromVTable(
	*process, vtable_ctx, vtable_ptr, cxx_record_decl, base_class_decl);
	if (base_offset == INT64_MAX)
	return false;

	bit_offset = base_offset * 8;

	return true;
	}

	typedef lldb_private::ThreadSafeDenseMap<clang::ASTContext , TypeSystemClang >
	ClangASTMap;

	static ClangASTMap &GetASTMap() {
	static ClangASTMap *g_map_ptr = nullptr;
	static llvm::once_flag g_once_flag;
	llvm::call_once(g_once_flag, []() {
	g_map_ptr = new ClangASTMap(); // leaked on purpose to avoid spins
	});
	return *g_map_ptr;
	}

	TypePayloadClang::TypePayloadClang(OptionalClangModuleID owning_module,
	bool is_complete_objc_class)
	: m_payload(owning_module.GetValue()) {
	SetIsCompleteObjCClass(is_complete_objc_class);
	}

	void TypePayloadClang::SetOwningModule(OptionalClangModuleID id) {
	assert(id.GetValue() < ObjCClassBit);
	bool is_complete = IsCompleteObjCClass();
	m_payload = id.GetValue();
	SetIsCompleteObjCClass(is_complete);
	}

	static void SetMemberOwningModule(clang::Decl *member,
	const clang::Decl *parent) {
	if (!member \|\| !parent)
	return;

	OptionalClangModuleID id(parent->getOwningModuleID());
	if (!id.HasValue())
	return;

	member->setFromASTFile();
	member->setOwningModuleID(id.GetValue());
	member->setModuleOwnershipKind(clang::Decl::ModuleOwnershipKind::Visible);
	if (llvm::isa<clang::NamedDecl>(member))
	if (auto *dc = llvm::dyn_cast<clang::DeclContext>(parent)) {
	dc->setHasExternalVisibleStorage(true);
	// This triggers ExternalASTSource::FindExternalVisibleDeclsByName() to be
	// called when searching for members.
	dc->setHasExternalLexicalStorage(true);
	}
	}

	char TypeSystemClang::ID;

	bool TypeSystemClang::IsOperator(llvm::StringRef name,
	clang::OverloadedOperatorKind &op_kind) {
	// All operators have to start with "operator".
	if (!name.consume_front("operator"))
	return false;

	// Remember if there was a space after "operator". This is necessary to
	// check for collisions with strangely named functions like "operatorint()".
	bool space_after_operator = name.consume_front(" ");

	op_kind = StringSwitch<clang::OverloadedOperatorKind>(name)
	.Case("+", clang::OO_Plus)
	.Case("+=", clang::OO_PlusEqual)
	.Case("++", clang::OO_PlusPlus)
	.Case("-", clang::OO_Minus)
	.Case("-=", clang::OO_MinusEqual)
	.Case("--", clang::OO_MinusMinus)
	.Case("->", clang::OO_Arrow)
	.Case("->*", clang::OO_ArrowStar)
	.Case("*", clang::OO_Star)
	.Case("*=", clang::OO_StarEqual)
	.Case("/", clang::OO_Slash)
	.Case("/=", clang::OO_SlashEqual)
	.Case("%", clang::OO_Percent)
	.Case("%=", clang::OO_PercentEqual)
	.Case("^", clang::OO_Caret)
	.Case("^=", clang::OO_CaretEqual)
	.Case("&", clang::OO_Amp)
	.Case("&=", clang::OO_AmpEqual)
	.Case("&&", clang::OO_AmpAmp)
	.Case("\|", clang::OO_Pipe)
	.Case("\|=", clang::OO_PipeEqual)
	.Case("\|\|", clang::OO_PipePipe)
	.Case("~", clang::OO_Tilde)
	.Case("!", clang::OO_Exclaim)
	.Case("!=", clang::OO_ExclaimEqual)
	.Case("=", clang::OO_Equal)
	.Case("==", clang::OO_EqualEqual)
	.Case("<", clang::OO_Less)
	.Case("<<", clang::OO_LessLess)
	.Case("<<=", clang::OO_LessLessEqual)
	.Case("<=", clang::OO_LessEqual)
	.Case(">", clang::OO_Greater)
	.Case(">>", clang::OO_GreaterGreater)
	.Case(">>=", clang::OO_GreaterGreaterEqual)
	.Case(">=", clang::OO_GreaterEqual)
	.Case("()", clang::OO_Call)
	.Case("[]", clang::OO_Subscript)
	.Case(",", clang::OO_Comma)
	.Default(clang::NUM_OVERLOADED_OPERATORS);

	// We found a fitting operator, so we can exit now.
	if (op_kind != clang::NUM_OVERLOADED_OPERATORS)
	return true;

	// After the "operator " or "operator" part is something unknown. This means
	// it's either one of the named operators (new/delete), a conversion operator
	// (e.g. operator bool) or a function which name starts with "operator"
	// (e.g. void operatorbool).

	// If it's a function that starts with operator it can't have a space after
	// "operator" because identifiers can't contain spaces.
	// E.g. "operator int" (conversion operator)
	// vs. "operatorint" (function with colliding name).
	if (!space_after_operator)
	return false; // not an operator.

	// Now the operator is either one of the named operators or a conversion
	// operator.
	op_kind = StringSwitch<clang::OverloadedOperatorKind>(name)
	.Case("new", clang::OO_New)
	.Case("new[]", clang::OO_Array_New)
	.Case("delete", clang::OO_Delete)
	.Case("delete[]", clang::OO_Array_Delete)
	// conversion operators hit this case.
	.Default(clang::NUM_OVERLOADED_OPERATORS);

	return true;
	}

	clang::AccessSpecifier
	TypeSystemClang::ConvertAccessTypeToAccessSpecifier(AccessType access) {
	switch (access) {
	default:
	break;
	case eAccessNone:
	return AS_none;
	case eAccessPublic:
	return AS_public;
	case eAccessPrivate:
	return AS_private;
	case eAccessProtected:
	return AS_protected;
	}
	return AS_none;
	}

	static void ParseLangArgs(LangOptions &Opts, InputKind IK, const char *triple) {
	// FIXME: Cleanup per-file based stuff.

	// Set some properties which depend solely on the input kind; it would be
	// nice to move these to the language standard, and have the driver resolve
	// the input kind + language standard.
	if (IK.getLanguage() == clang::Language::Asm) {
	Opts.AsmPreprocessor = 1;
	} else if (IK.isObjectiveC()) {
	Opts.ObjC = 1;
	}

	LangStandard::Kind LangStd = LangStandard::lang_unspecified;

	if (LangStd == LangStandard::lang_unspecified) {
	// Based on the base language, pick one.
	switch (IK.getLanguage()) {
	case clang::Language::Unknown:
	case clang::Language::LLVM_IR:
	case clang::Language::RenderScript:
	llvm_unreachable("Invalid input kind!");
	case clang::Language::OpenCL:
	LangStd = LangStandard::lang_opencl10;
	break;
	case clang::Language::OpenCLCXX:
	LangStd = LangStandard::lang_openclcpp10;
	break;
	case clang::Language::CUDA:
	LangStd = LangStandard::lang_cuda;
	break;
	case clang::Language::Asm:
	case clang::Language::C:
	case clang::Language::ObjC:
	LangStd = LangStandard::lang_gnu99;
	break;
	case clang::Language::CXX:
	case clang::Language::ObjCXX:
	LangStd = LangStandard::lang_gnucxx98;
	break;
	case clang::Language::HIP:
	LangStd = LangStandard::lang_hip;
	break;
	case clang::Language::HLSL:
	LangStd = LangStandard::lang_hlsl;
	break;
	}
	}

	const LangStandard &Std = LangStandard::getLangStandardForKind(LangStd);
	Opts.LineComment = Std.hasLineComments();
	Opts.C99 = Std.isC99();
	Opts.CPlusPlus = Std.isCPlusPlus();
	Opts.CPlusPlus11 = Std.isCPlusPlus11();
	Opts.Digraphs = Std.hasDigraphs();
	Opts.GNUMode = Std.isGNUMode();
	Opts.GNUInline = !Std.isC99();
	Opts.HexFloats = Std.hasHexFloats();

	Opts.WChar = true;

	// OpenCL has some additional defaults.
	if (LangStd == LangStandard::lang_opencl10) {
	Opts.OpenCL = 1;
	Opts.AltiVec = 1;
	Opts.CXXOperatorNames = 1;
	Opts.setLaxVectorConversions(LangOptions::LaxVectorConversionKind::All);
	}

	// OpenCL and C++ both have bool, true, false keywords.
	Opts.Bool = Opts.OpenCL \|\| Opts.CPlusPlus;

	Opts.setValueVisibilityMode(DefaultVisibility);

	// Mimicing gcc's behavior, trigraphs are only enabled if -trigraphs is
	// specified, or -std is set to a conforming mode.
	Opts.Trigraphs = !Opts.GNUMode;
	Opts.CharIsSigned = ArchSpec(triple).CharIsSignedByDefault();
	Opts.OptimizeSize = 0;

	// FIXME: Eliminate this dependency.
	// unsigned Opt =
	// Args.hasArg(OPT_Os) ? 2 : getLastArgIntValue(Args, OPT_O, 0, Diags);
	// Opts.Optimize = Opt != 0;
	unsigned Opt = 0;

	// This is the __NO_INLINE__ define, which just depends on things like the
	// optimization level and -fno-inline, not actually whether the backend has
	// inlining enabled.
	//
	// FIXME: This is affected by other options (-fno-inline).
	Opts.NoInlineDefine = !Opt;

	// This is needed to allocate the extra space for the owning module
	// on each decl.
	Opts.ModulesLocalVisibility = 1;
	}

	TypeSystemClang::TypeSystemClang(llvm::StringRef name,
	llvm::Triple target_triple) {
	m_display_name = name.str();
	if (!target_triple.str().empty())
	SetTargetTriple(target_triple.str());
	// The caller didn't pass an ASTContext so create a new one for this
	// TypeSystemClang.
	CreateASTContext();
	}

	TypeSystemClang::TypeSystemClang(llvm::StringRef name,
	ASTContext &existing_ctxt) {
	m_display_name = name.str();
	SetTargetTriple(existing_ctxt.getTargetInfo().getTriple().str());

	m_ast_up.reset(&existing_ctxt);
	GetASTMap().Insert(&existing_ctxt, this);
	}

	// Destructor
	TypeSystemClang::~TypeSystemClang() { Finalize(); }

	lldb::TypeSystemSP TypeSystemClang::CreateInstance(lldb::LanguageType language,
	lldb_private::Module *module,
	Target *target) {
	if (!TypeSystemClangSupportsLanguage(language))
	return lldb::TypeSystemSP();
	ArchSpec arch;
	if (module)
	arch = module->GetArchitecture();
	else if (target)
	arch = target->GetArchitecture();

	if (!arch.IsValid())
	return lldb::TypeSystemSP();

	llvm::Triple triple = arch.GetTriple();
	// LLVM wants this to be set to iOS or MacOSX; if we're working on
	// a bare-boards type image, change the triple for llvm's benefit.
	if (triple.getVendor() == llvm::Triple::Apple &&
	triple.getOS() == llvm::Triple::UnknownOS) {
	if (triple.getArch() == llvm::Triple::arm \|\|
	triple.getArch() == llvm::Triple::aarch64 \|\|
	triple.getArch() == llvm::Triple::aarch64_32 \|\|
	triple.getArch() == llvm::Triple::thumb) {
	triple.setOS(llvm::Triple::IOS);
	} else {
	triple.setOS(llvm::Triple::MacOSX);
	}
	}

	if (module) {
	std::string ast_name =
	"ASTContext for '" + module->GetFileSpec().GetPath() + "'";
	return std::make_shared<TypeSystemClang>(ast_name, triple);
	} else if (target && target->IsValid())
	return std::make_shared<ScratchTypeSystemClang>(*target, triple);
	return lldb::TypeSystemSP();
	}

	LanguageSet TypeSystemClang::GetSupportedLanguagesForTypes() {
	LanguageSet languages;
	languages.Insert(lldb::eLanguageTypeC89);
	languages.Insert(lldb::eLanguageTypeC);
	languages.Insert(lldb::eLanguageTypeC11);
	languages.Insert(lldb::eLanguageTypeC_plus_plus);
	languages.Insert(lldb::eLanguageTypeC99);
	languages.Insert(lldb::eLanguageTypeObjC);
	languages.Insert(lldb::eLanguageTypeObjC_plus_plus);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_03);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_11);
	languages.Insert(lldb::eLanguageTypeC11);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_14);
	return languages;
	}

	LanguageSet TypeSystemClang::GetSupportedLanguagesForExpressions() {
	LanguageSet languages;
	languages.Insert(lldb::eLanguageTypeC_plus_plus);
	languages.Insert(lldb::eLanguageTypeObjC_plus_plus);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_03);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_11);
	languages.Insert(lldb::eLanguageTypeC_plus_plus_14);
	return languages;
	}

	void TypeSystemClang::Initialize() {
	PluginManager::RegisterPlugin(
	GetPluginNameStatic(), "clang base AST context plug-in", CreateInstance,
	GetSupportedLanguagesForTypes(), GetSupportedLanguagesForExpressions());
	}

	void TypeSystemClang::Terminate() {
	PluginManager::UnregisterPlugin(CreateInstance);
	}

	void TypeSystemClang::Finalize() {
	assert(m_ast_up);
	GetASTMap().Erase(m_ast_up.get());
	if (!m_ast_owned)
	m_ast_up.release();

	m_builtins_up.reset();
	m_selector_table_up.reset();
	m_identifier_table_up.reset();
	m_target_info_up.reset();
	m_target_options_rp.reset();
	m_diagnostics_engine_up.reset();
	m_source_manager_up.reset();
	m_language_options_up.reset();
	}

	void TypeSystemClang::setSema(Sema *s) {
	// Ensure that the new sema actually belongs to our ASTContext.
	assert(s == nullptr \|\| &s->getASTContext() == m_ast_up.get());
	m_sema = s;
	}

	const char *TypeSystemClang::GetTargetTriple() {
	return m_target_triple.c_str();
	}

	void TypeSystemClang::SetTargetTriple(llvm::StringRef target_triple) {
	m_target_triple = target_triple.str();
	}

	void TypeSystemClang::SetExternalSource(
	llvm::IntrusiveRefCntPtr<ExternalASTSource> &ast_source_up) {
	ASTContext &ast = getASTContext();
	ast.getTranslationUnitDecl()->setHasExternalLexicalStorage(true);
	ast.setExternalSource(ast_source_up);
	}

	ASTContext &TypeSystemClang::getASTContext() {
	assert(m_ast_up);
	return *m_ast_up;
	}

	class NullDiagnosticConsumer : public DiagnosticConsumer {
	public:
	NullDiagnosticConsumer() { m_log = GetLog(LLDBLog::Expressions); }

	void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
	const clang::Diagnostic &info) override {
	if (m_log) {
	llvm::SmallVector<char, 32> diag_str(10);
	info.FormatDiagnostic(diag_str);
	diag_str.push_back('\0');
	LLDB_LOGF(m_log, "Compiler diagnostic: %s\n", diag_str.data());
	}
	}

	DiagnosticConsumer *clone(DiagnosticsEngine &Diags) const {
	return new NullDiagnosticConsumer();
	}

	private:
	Log *m_log;
	};

	void TypeSystemClang::CreateASTContext() {
	assert(!m_ast_up);
	m_ast_owned = true;

	m_language_options_up = std::make_unique<LangOptions>();
	ParseLangArgs(*m_language_options_up, clang::Language::ObjCXX,
	GetTargetTriple());

	m_identifier_table_up =
	std::make_unique<IdentifierTable>(*m_language_options_up, nullptr);
	m_builtins_up = std::make_unique<Builtin::Context>();

	m_selector_table_up = std::make_unique<SelectorTable>();

	clang::FileSystemOptions file_system_options;
	m_file_manager_up = std::make_unique<clang::FileManager>(
	file_system_options, FileSystem::Instance().GetVirtualFileSystem());

	llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_id_sp(new DiagnosticIDs());
	m_diagnostics_engine_up =
	std::make_unique<DiagnosticsEngine>(diag_id_sp, new DiagnosticOptions());

	m_source_manager_up = std::make_unique<clang::SourceManager>(
	m_diagnostics_engine_up, m_file_manager_up);
	m_ast_up = std::make_unique<ASTContext>(
	m_language_options_up, m_source_manager_up, *m_identifier_table_up,
	m_selector_table_up, m_builtins_up, TU_Complete);

	m_diagnostic_consumer_up = std::make_unique<NullDiagnosticConsumer>();
	m_ast_up->getDiagnostics().setClient(m_diagnostic_consumer_up.get(), false);

	// This can be NULL if we don't know anything about the architecture or if
	// the target for an architecture isn't enabled in the llvm/clang that we
	// built
	TargetInfo *target_info = getTargetInfo();
	if (target_info)
	m_ast_up->InitBuiltinTypes(*target_info);

	GetASTMap().Insert(m_ast_up.get(), this);

	llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> ast_source_up(
	new ClangExternalASTSourceCallbacks(*this));
	SetExternalSource(ast_source_up);
	}

	TypeSystemClang TypeSystemClang::GetASTContext(clang::ASTContext ast) {
	TypeSystemClang *clang_ast = GetASTMap().Lookup(ast);
	return clang_ast;
	}

	clang::MangleContext *TypeSystemClang::getMangleContext() {
	if (m_mangle_ctx_up == nullptr)
	m_mangle_ctx_up.reset(getASTContext().createMangleContext());
	return m_mangle_ctx_up.get();
	}

	std::shared_ptr<clang::TargetOptions> &TypeSystemClang::getTargetOptions() {
	if (m_target_options_rp == nullptr && !m_target_triple.empty()) {
	m_target_options_rp = std::make_shared<clang::TargetOptions>();
	if (m_target_options_rp != nullptr)
	m_target_options_rp->Triple = m_target_triple;
	}
	return m_target_options_rp;
	}

	TargetInfo *TypeSystemClang::getTargetInfo() {
	// target_triple should be something like "x86_64-apple-macosx"
	if (m_target_info_up == nullptr && !m_target_triple.empty())
	m_target_info_up.reset(TargetInfo::CreateTargetInfo(
	getASTContext().getDiagnostics(), getTargetOptions()));
	return m_target_info_up.get();
	}

	#pragma mark Basic Types

	static inline bool QualTypeMatchesBitSize(const uint64_t bit_size,
	ASTContext &ast, QualType qual_type) {
	uint64_t qual_type_bit_size = ast.getTypeSize(qual_type);
	return qual_type_bit_size == bit_size;
	}

	CompilerType
	TypeSystemClang::GetBuiltinTypeForEncodingAndBitSize(Encoding encoding,
	size_t bit_size) {
	ASTContext &ast = getASTContext();
	switch (encoding) {
	case eEncodingInvalid:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.VoidPtrTy))
	return GetType(ast.VoidPtrTy);
	break;

	case eEncodingUint:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
	return GetType(ast.UnsignedIntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
	return GetType(ast.UnsignedLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
	return GetType(ast.UnsignedLongLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
	return GetType(ast.UnsignedInt128Ty);
	break;

	case eEncodingSint:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
	return GetType(ast.SignedCharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
	return GetType(ast.ShortTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
	return GetType(ast.IntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
	return GetType(ast.LongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
	return GetType(ast.LongLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
	return GetType(ast.Int128Ty);
	break;

	case eEncodingIEEE754:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
	return GetType(ast.FloatTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
	return GetType(ast.DoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
	return GetType(ast.LongDoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.HalfTy))
	return GetType(ast.HalfTy);
	break;

	case eEncodingVector:
	// Sanity check that bit_size is a multiple of 8's.
	if (bit_size && !(bit_size & 0x7u))
	return GetType(ast.getExtVectorType(ast.UnsignedCharTy, bit_size / 8));
	break;
	}

	return CompilerType();
	}

	lldb::BasicType
	TypeSystemClang::GetBasicTypeEnumeration(ConstString name) {
	if (name) {
	typedef UniqueCStringMap<lldb::BasicType> TypeNameToBasicTypeMap;
	static TypeNameToBasicTypeMap g_type_map;
	static llvm::once_flag g_once_flag;
	llvm::call_once(g_once_flag, []() {
	// "void"
	g_type_map.Append(ConstString("void"), eBasicTypeVoid);

	// "char"
	g_type_map.Append(ConstString("char"), eBasicTypeChar);
	g_type_map.Append(ConstString("signed char"), eBasicTypeSignedChar);
	g_type_map.Append(ConstString("unsigned char"), eBasicTypeUnsignedChar);
	g_type_map.Append(ConstString("wchar_t"), eBasicTypeWChar);
	g_type_map.Append(ConstString("signed wchar_t"), eBasicTypeSignedWChar);
	g_type_map.Append(ConstString("unsigned wchar_t"),
	eBasicTypeUnsignedWChar);
	// "short"
	g_type_map.Append(ConstString("short"), eBasicTypeShort);
	g_type_map.Append(ConstString("short int"), eBasicTypeShort);
	g_type_map.Append(ConstString("unsigned short"), eBasicTypeUnsignedShort);
	g_type_map.Append(ConstString("unsigned short int"),
	eBasicTypeUnsignedShort);

	// "int"
	g_type_map.Append(ConstString("int"), eBasicTypeInt);
	g_type_map.Append(ConstString("signed int"), eBasicTypeInt);
	g_type_map.Append(ConstString("unsigned int"), eBasicTypeUnsignedInt);
	g_type_map.Append(ConstString("unsigned"), eBasicTypeUnsignedInt);

	// "long"
	g_type_map.Append(ConstString("long"), eBasicTypeLong);
	g_type_map.Append(ConstString("long int"), eBasicTypeLong);
	g_type_map.Append(ConstString("unsigned long"), eBasicTypeUnsignedLong);
	g_type_map.Append(ConstString("unsigned long int"),
	eBasicTypeUnsignedLong);

	// "long long"
	g_type_map.Append(ConstString("long long"), eBasicTypeLongLong);
	g_type_map.Append(ConstString("long long int"), eBasicTypeLongLong);
	g_type_map.Append(ConstString("unsigned long long"),
	eBasicTypeUnsignedLongLong);
	g_type_map.Append(ConstString("unsigned long long int"),
	eBasicTypeUnsignedLongLong);

	// "int128"
	g_type_map.Append(ConstString("__int128_t"), eBasicTypeInt128);
	g_type_map.Append(ConstString("__uint128_t"), eBasicTypeUnsignedInt128);

	// Miscellaneous
	g_type_map.Append(ConstString("bool"), eBasicTypeBool);
	g_type_map.Append(ConstString("float"), eBasicTypeFloat);
	g_type_map.Append(ConstString("double"), eBasicTypeDouble);
	g_type_map.Append(ConstString("long double"), eBasicTypeLongDouble);
	g_type_map.Append(ConstString("id"), eBasicTypeObjCID);
	g_type_map.Append(ConstString("SEL"), eBasicTypeObjCSel);
	g_type_map.Append(ConstString("nullptr"), eBasicTypeNullPtr);
	g_type_map.Sort();
	});

	return g_type_map.Find(name, eBasicTypeInvalid);
	}
	return eBasicTypeInvalid;
	}

	uint32_t TypeSystemClang::GetPointerByteSize() {
	if (m_pointer_byte_size == 0)
	if (auto size = GetBasicType(lldb::eBasicTypeVoid)
	.GetPointerType()
	.GetByteSize(nullptr))
	m_pointer_byte_size = *size;
	return m_pointer_byte_size;
	}

	CompilerType TypeSystemClang::GetBasicType(lldb::BasicType basic_type) {
	clang::ASTContext &ast = getASTContext();

	lldb::opaque_compiler_type_t clang_type =
	GetOpaqueCompilerType(&ast, basic_type);

	if (clang_type)
	return CompilerType(this, clang_type);
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetBuiltinTypeForDWARFEncodingAndBitSize(
	llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size) {
	ASTContext &ast = getASTContext();

	switch (dw_ate) {
	default:
	break;

	case DW_ATE_address:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.VoidPtrTy))
	return GetType(ast.VoidPtrTy);
	break;

	case DW_ATE_boolean:
	if (QualTypeMatchesBitSize(bit_size, ast, ast.BoolTy))
	return GetType(ast.BoolTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
	return GetType(ast.UnsignedIntTy);
	break;

	case DW_ATE_lo_user:
	// This has been seen to mean DW_AT_complex_integer
	if (type_name.contains("complex")) {
	CompilerType complex_int_clang_type =
	GetBuiltinTypeForDWARFEncodingAndBitSize("int", DW_ATE_signed,
	bit_size / 2);
	return GetType(
	ast.getComplexType(ClangUtil::GetQualType(complex_int_clang_type)));
	}
	break;

	case DW_ATE_complex_float: {
	CanQualType FloatComplexTy = ast.getComplexType(ast.FloatTy);
	if (QualTypeMatchesBitSize(bit_size, ast, FloatComplexTy))
	return GetType(FloatComplexTy);

	CanQualType DoubleComplexTy = ast.getComplexType(ast.DoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, DoubleComplexTy))
	return GetType(DoubleComplexTy);

	CanQualType LongDoubleComplexTy = ast.getComplexType(ast.LongDoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, LongDoubleComplexTy))
	return GetType(LongDoubleComplexTy);

	CompilerType complex_float_clang_type =
	GetBuiltinTypeForDWARFEncodingAndBitSize("float", DW_ATE_float,
	bit_size / 2);
	return GetType(
	ast.getComplexType(ClangUtil::GetQualType(complex_float_clang_type)));
	}

	case DW_ATE_float:
	if (type_name == "float" &&
	QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
	return GetType(ast.FloatTy);
	if (type_name == "double" &&
	QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
	return GetType(ast.DoubleTy);
	if (type_name == "long double" &&
	QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
	return GetType(ast.LongDoubleTy);
	// Fall back to not requiring a name match
	if (QualTypeMatchesBitSize(bit_size, ast, ast.FloatTy))
	return GetType(ast.FloatTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.DoubleTy))
	return GetType(ast.DoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongDoubleTy))
	return GetType(ast.LongDoubleTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.HalfTy))
	return GetType(ast.HalfTy);
	break;

	case DW_ATE_signed:
	if (!type_name.empty()) {
	if (type_name == "wchar_t" &&
	QualTypeMatchesBitSize(bit_size, ast, ast.WCharTy) &&
	(getTargetInfo() &&
	TargetInfo::isTypeSigned(getTargetInfo()->getWCharType())))
	return GetType(ast.WCharTy);
	if (type_name == "void" &&
	QualTypeMatchesBitSize(bit_size, ast, ast.VoidTy))
	return GetType(ast.VoidTy);
	if (type_name.contains("long long") &&
	QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
	return GetType(ast.LongLongTy);
	if (type_name.contains("long") &&
	QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
	return GetType(ast.LongTy);
	if (type_name.contains("short") &&
	QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
	return GetType(ast.ShortTy);
	if (type_name.contains("char")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
	return GetType(ast.CharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
	return GetType(ast.SignedCharTy);
	}
	if (type_name.contains("int")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
	return GetType(ast.IntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
	return GetType(ast.Int128Ty);
	}
	}
	// We weren't able to match up a type name, just search by size
	if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
	return GetType(ast.CharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.ShortTy))
	return GetType(ast.ShortTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.IntTy))
	return GetType(ast.IntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongTy))
	return GetType(ast.LongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.LongLongTy))
	return GetType(ast.LongLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.Int128Ty))
	return GetType(ast.Int128Ty);
	break;

	case DW_ATE_signed_char:
	if (ast.getLangOpts().CharIsSigned && type_name == "char") {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
	return GetType(ast.CharTy);
	}
	if (QualTypeMatchesBitSize(bit_size, ast, ast.SignedCharTy))
	return GetType(ast.SignedCharTy);
	break;

	case DW_ATE_unsigned:
	if (!type_name.empty()) {
	if (type_name == "wchar_t") {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.WCharTy)) {
	if (!(getTargetInfo() &&
	TargetInfo::isTypeSigned(getTargetInfo()->getWCharType())))
	return GetType(ast.WCharTy);
	}
	}
	if (type_name.contains("long long")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
	return GetType(ast.UnsignedLongLongTy);
	} else if (type_name.contains("long")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
	return GetType(ast.UnsignedLongTy);
	} else if (type_name.contains("short")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);
	} else if (type_name.contains("char")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);
	} else if (type_name.contains("int")) {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
	return GetType(ast.UnsignedIntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
	return GetType(ast.UnsignedInt128Ty);
	}
	}
	// We weren't able to match up a type name, just search by size
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedIntTy))
	return GetType(ast.UnsignedIntTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongTy))
	return GetType(ast.UnsignedLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedLongLongTy))
	return GetType(ast.UnsignedLongLongTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedInt128Ty))
	return GetType(ast.UnsignedInt128Ty);
	break;

	case DW_ATE_unsigned_char:
	if (!ast.getLangOpts().CharIsSigned && type_name == "char") {
	if (QualTypeMatchesBitSize(bit_size, ast, ast.CharTy))
	return GetType(ast.CharTy);
	}
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);
	if (QualTypeMatchesBitSize(bit_size, ast, ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);
	break;

	case DW_ATE_imaginary_float:
	break;

	case DW_ATE_UTF:
	switch (bit_size) {
	case 8:
	return GetType(ast.Char8Ty);
	case 16:
	return GetType(ast.Char16Ty);
	case 32:
	return GetType(ast.Char32Ty);
	default:
	if (!type_name.empty()) {
	if (type_name == "char16_t")
	return GetType(ast.Char16Ty);
	if (type_name == "char32_t")
	return GetType(ast.Char32Ty);
	if (type_name == "char8_t")
	return GetType(ast.Char8Ty);
	}
	}
	break;
	}

	Log *log = GetLog(LLDBLog::Types);
	LLDB_LOG(log,
	"error: need to add support for DW_TAG_base_type '{0}' "
	"encoded with DW_ATE = {1:x}, bit_size = {2}",
	type_name, dw_ate, bit_size);
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetCStringType(bool is_const) {
	ASTContext &ast = getASTContext();
	QualType char_type(ast.CharTy);

	if (is_const)
	char_type.addConst();

	return GetType(ast.getPointerType(char_type));
	}

	bool TypeSystemClang::AreTypesSame(CompilerType type1, CompilerType type2,
	bool ignore_qualifiers) {
	TypeSystemClang *ast =
	llvm::dyn_cast_or_null<TypeSystemClang>(type1.GetTypeSystem());
	if (!ast \|\| ast != type2.GetTypeSystem())
	return false;

	if (type1.GetOpaqueQualType() == type2.GetOpaqueQualType())
	return true;

	QualType type1_qual = ClangUtil::GetQualType(type1);
	QualType type2_qual = ClangUtil::GetQualType(type2);

	if (ignore_qualifiers) {
	type1_qual = type1_qual.getUnqualifiedType();
	type2_qual = type2_qual.getUnqualifiedType();
	}

	return ast->getASTContext().hasSameType(type1_qual, type2_qual);
	}

	CompilerType TypeSystemClang::GetTypeForDecl(void *opaque_decl) {
	if (!opaque_decl)
	return CompilerType();

	clang::Decl decl = static_cast<clang::Decl >(opaque_decl);
	if (auto *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl))
	return GetTypeForDecl(named_decl);
	return CompilerType();
	}

	CompilerDeclContext TypeSystemClang::CreateDeclContext(DeclContext *ctx) {
	// Check that the DeclContext actually belongs to this ASTContext.
	assert(&ctx->getParentASTContext() == &getASTContext());
	return CompilerDeclContext(this, ctx);
	}

	CompilerType TypeSystemClang::GetTypeForDecl(clang::NamedDecl *decl) {
	if (clang::ObjCInterfaceDecl *interface_decl =
	llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl))
	return GetTypeForDecl(interface_decl);
	if (clang::TagDecl *tag_decl = llvm::dyn_cast<clang::TagDecl>(decl))
	return GetTypeForDecl(tag_decl);
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetTypeForDecl(TagDecl *decl) {
	return GetType(getASTContext().getTagDeclType(decl));
	}

	CompilerType TypeSystemClang::GetTypeForDecl(ObjCInterfaceDecl *decl) {
	return GetType(getASTContext().getObjCInterfaceType(decl));
	}

	#pragma mark Structure, Unions, Classes

	void TypeSystemClang::SetOwningModule(clang::Decl *decl,
	OptionalClangModuleID owning_module) {
	if (!decl \|\| !owning_module.HasValue())
	return;

	decl->setFromASTFile();
	decl->setOwningModuleID(owning_module.GetValue());
	decl->setModuleOwnershipKind(clang::Decl::ModuleOwnershipKind::Visible);
	}

	OptionalClangModuleID
	TypeSystemClang::GetOrCreateClangModule(llvm::StringRef name,
	OptionalClangModuleID parent,
	bool is_framework, bool is_explicit) {
	// Get the external AST source which holds the modules.
	auto *ast_source = llvm::dyn_cast_or_null<ClangExternalASTSourceCallbacks>(
	getASTContext().getExternalSource());
	assert(ast_source && "external ast source was lost");
	if (!ast_source)
	return {};

	// Lazily initialize the module map.
	if (!m_header_search_up) {
	auto HSOpts = std::make_shared<clang::HeaderSearchOptions>();
	m_header_search_up = std::make_unique<clang::HeaderSearch>(
	HSOpts, m_source_manager_up, m_diagnostics_engine_up,
	*m_language_options_up, m_target_info_up.get());
	m_module_map_up = std::make_unique<clang::ModuleMap>(
	m_source_manager_up, m_diagnostics_engine_up, *m_language_options_up,
	m_target_info_up.get(), *m_header_search_up);
	}

	// Get or create the module context.
	bool created;
	clang::Module *module;
	auto parent_desc = ast_source->getSourceDescriptor(parent.GetValue());
	std::tie(module, created) = m_module_map_up->findOrCreateModule(
	name, parent_desc ? parent_desc->getModuleOrNull() : nullptr,
	is_framework, is_explicit);
	if (!created)
	return ast_source->GetIDForModule(module);

	return ast_source->RegisterModule(module);
	}

	CompilerType TypeSystemClang::CreateRecordType(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	AccessType access_type, llvm::StringRef name, int kind,
	LanguageType language, ClangASTMetadata *metadata, bool exports_symbols) {
	ASTContext &ast = getASTContext();

	if (decl_ctx == nullptr)
	decl_ctx = ast.getTranslationUnitDecl();

	if (language == eLanguageTypeObjC \|\|
	language == eLanguageTypeObjC_plus_plus) {
	bool isForwardDecl = true;
	bool isInternal = false;
	return CreateObjCClass(name, decl_ctx, owning_module, isForwardDecl,
	isInternal, metadata);
	}

	// NOTE: Eventually CXXRecordDecl will be merged back into RecordDecl and
	// we will need to update this code. I was told to currently always use the
	// CXXRecordDecl class since we often don't know from debug information if
	// something is struct or a class, so we default to always use the more
	// complete definition just in case.

	bool has_name = !name.empty();
	CXXRecordDecl *decl = CXXRecordDecl::CreateDeserialized(ast, 0);
	decl->setTagKind(static_cast<TagDecl::TagKind>(kind));
	decl->setDeclContext(decl_ctx);
	if (has_name)
	decl->setDeclName(&ast.Idents.get(name));
	SetOwningModule(decl, owning_module);

	if (!has_name) {
	// In C++ a lambda is also represented as an unnamed class. This is
	// different from an anonymous class that the user wrote:
	//
	// struct A {
	// // anonymous class (GNU/MSVC extension)
	// struct {
	// int x;
	// };
	// // unnamed class within a class
	// struct {
	// int y;
	// } B;
	// };
	//
	// void f() {
	// // unammed class outside of a class
	// struct {
	// int z;
	// } C;
	// }
	//
	// Anonymous classes is a GNU/MSVC extension that clang supports. It
	// requires the anonymous class be embedded within a class. So the new
	// heuristic verifies this condition.
	if (isa<CXXRecordDecl>(decl_ctx) && exports_symbols)
	decl->setAnonymousStructOrUnion(true);
	}

	if (metadata)
	SetMetadata(decl, *metadata);

	if (access_type != eAccessNone)
	decl->setAccess(ConvertAccessTypeToAccessSpecifier(access_type));

	if (decl_ctx)
	decl_ctx->addDecl(decl);

	return GetType(ast.getTagDeclType(decl));
	}

	namespace {
	/// Returns true iff the given TemplateArgument should be represented as an
	/// NonTypeTemplateParmDecl in the AST.
	bool IsValueParam(const clang::TemplateArgument &argument) {
	return argument.getKind() == TemplateArgument::Integral;
	}

	void AddAccessSpecifierDecl(clang::CXXRecordDecl *cxx_record_decl,
	ASTContext &ct,
	clang::AccessSpecifier previous_access,
	clang::AccessSpecifier access_specifier) {
	if (!cxx_record_decl->isClass() && !cxx_record_decl->isStruct())
	return;
	if (previous_access != access_specifier) {
	// For struct, don't add AS_public if it's the first AccessSpecDecl.
	// For class, don't add AS_private if it's the first AccessSpecDecl.
	if ((cxx_record_decl->isStruct() &&
	previous_access == clang::AccessSpecifier::AS_none &&
	access_specifier == clang::AccessSpecifier::AS_public) \|\|
	(cxx_record_decl->isClass() &&
	previous_access == clang::AccessSpecifier::AS_none &&
	access_specifier == clang::AccessSpecifier::AS_private)) {
	return;
	}
	cxx_record_decl->addDecl(
	AccessSpecDecl::Create(ct, access_specifier, cxx_record_decl,
	SourceLocation(), SourceLocation()));
	}
	}
	} // namespace

	static TemplateParameterList *CreateTemplateParameterList(
	ASTContext &ast,
	const TypeSystemClang::TemplateParameterInfos &template_param_infos,
	llvm::SmallVector<NamedDecl *, 8> &template_param_decls) {
	const bool parameter_pack = false;
	const bool is_typename = false;
	const unsigned depth = 0;
	const size_t num_template_params = template_param_infos.args.size();
	DeclContext *const decl_context =
	ast.getTranslationUnitDecl(); // Is this the right decl context?,
	for (size_t i = 0; i < num_template_params; ++i) {
	const char *name = template_param_infos.names[i];

	IdentifierInfo *identifier_info = nullptr;
	if (name && name[0])
	identifier_info = &ast.Idents.get(name);
	if (IsValueParam(template_param_infos.args[i])) {
	QualType template_param_type =
	template_param_infos.args[i].getIntegralType();
	template_param_decls.push_back(NonTypeTemplateParmDecl::Create(
	ast, decl_context, SourceLocation(), SourceLocation(), depth, i,
	identifier_info, template_param_type, parameter_pack,
	ast.getTrivialTypeSourceInfo(template_param_type)));
	} else {
	template_param_decls.push_back(TemplateTypeParmDecl::Create(
	ast, decl_context, SourceLocation(), SourceLocation(), depth, i,
	identifier_info, is_typename, parameter_pack));
	}
	}

	if (template_param_infos.packed_args) {
	IdentifierInfo *identifier_info = nullptr;
	if (template_param_infos.pack_name && template_param_infos.pack_name[0])
	identifier_info = &ast.Idents.get(template_param_infos.pack_name);
	const bool parameter_pack_true = true;

	if (!template_param_infos.packed_args->args.empty() &&
	IsValueParam(template_param_infos.packed_args->args[0])) {
	QualType template_param_type =
	template_param_infos.packed_args->args[0].getIntegralType();
	template_param_decls.push_back(NonTypeTemplateParmDecl::Create(
	ast, decl_context, SourceLocation(), SourceLocation(), depth,
	num_template_params, identifier_info, template_param_type,
	parameter_pack_true,
	ast.getTrivialTypeSourceInfo(template_param_type)));
	} else {
	template_param_decls.push_back(TemplateTypeParmDecl::Create(
	ast, decl_context, SourceLocation(), SourceLocation(), depth,
	num_template_params, identifier_info, is_typename,
	parameter_pack_true));
	}
	}
	clang::Expr *const requires_clause = nullptr; // TODO: Concepts
	TemplateParameterList *template_param_list = TemplateParameterList::Create(
	ast, SourceLocation(), SourceLocation(), template_param_decls,
	SourceLocation(), requires_clause);
	return template_param_list;
	}

	clang::FunctionTemplateDecl *TypeSystemClang::CreateFunctionTemplateDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	clang::FunctionDecl *func_decl,
	const TemplateParameterInfos &template_param_infos) {
	// /// Create a function template node.
	ASTContext &ast = getASTContext();

	llvm::SmallVector<NamedDecl *, 8> template_param_decls;
	TemplateParameterList *template_param_list = CreateTemplateParameterList(
	ast, template_param_infos, template_param_decls);
	FunctionTemplateDecl *func_tmpl_decl =
	FunctionTemplateDecl::CreateDeserialized(ast, 0);
	func_tmpl_decl->setDeclContext(decl_ctx);
	func_tmpl_decl->setLocation(func_decl->getLocation());
	func_tmpl_decl->setDeclName(func_decl->getDeclName());
	func_tmpl_decl->init(func_decl, template_param_list);
	SetOwningModule(func_tmpl_decl, owning_module);

	for (size_t i = 0, template_param_decl_count = template_param_decls.size();
	i < template_param_decl_count; ++i) {
	// TODO: verify which decl context we should put template_param_decls into..
	template_param_decls[i]->setDeclContext(func_decl);
	}
	// Function templates inside a record need to have an access specifier.
	// It doesn't matter what access specifier we give the template as LLDB
	// anyway allows accessing everything inside a record.
	if (decl_ctx->isRecord())
	func_tmpl_decl->setAccess(clang::AccessSpecifier::AS_public);

	return func_tmpl_decl;
	}

	void TypeSystemClang::CreateFunctionTemplateSpecializationInfo(
	FunctionDecl func_decl, clang::FunctionTemplateDecl func_tmpl_decl,
	const TemplateParameterInfos &infos) {
	TemplateArgumentList *template_args_ptr =
	TemplateArgumentList::CreateCopy(func_decl->getASTContext(), infos.args);

	func_decl->setFunctionTemplateSpecialization(func_tmpl_decl,
	template_args_ptr, nullptr);
	}

	/// Returns true if the given template parameter can represent the given value.
	/// For example, `typename T` can represent `int` but not integral values such
	/// as `int I = 3`.
	static bool TemplateParameterAllowsValue(NamedDecl *param,
	const TemplateArgument &value) {
	if (llvm::isa<TemplateTypeParmDecl>(param)) {
	// Compare the argument kind, i.e. ensure that <typename> != <int>.
	if (value.getKind() != TemplateArgument::Type)
	return false;
	} else if (auto *type_param =
	llvm::dyn_cast<NonTypeTemplateParmDecl>(param)) {
	// Compare the argument kind, i.e. ensure that <typename> != <int>.
	if (!IsValueParam(value))
	return false;
	// Compare the integral type, i.e. ensure that <int> != <char>.
	if (type_param->getType() != value.getIntegralType())
	return false;
	} else {
	// There is no way to create other parameter decls at the moment, so we
	// can't reach this case during normal LLDB usage. Log that this happened
	// and assert.
	Log *log = GetLog(LLDBLog::Expressions);
	LLDB_LOG(log,
	"Don't know how to compare template parameter to passed"
	" value. Decl kind of parameter is: {0}",
	param->getDeclKindName());
	lldbassert(false && "Can't compare this TemplateParmDecl subclass");
	// In release builds just fall back to marking the parameter as not
	// accepting the value so that we don't try to fit an instantiation to a
	// template that doesn't fit. E.g., avoid that `S<1>` is being connected to
	// `template<typename T> struct S;`.
	return false;
	}
	return true;
	}

	/// Returns true if the given class template declaration could produce an
	/// instantiation with the specified values.
	/// For example, `<typename T>` allows the arguments `float`, but not for
	/// example `bool, float` or `3` (as an integer parameter value).
	static bool ClassTemplateAllowsToInstantiationArgs(
	ClassTemplateDecl *class_template_decl,
	const TypeSystemClang::TemplateParameterInfos &instantiation_values) {

	TemplateParameterList &params = *class_template_decl->getTemplateParameters();

	// Save some work by iterating only once over the found parameters and
	// calculate the information related to parameter packs.

	// Contains the first pack parameter (or non if there are none).
	llvm::Optional<NamedDecl *> pack_parameter;
	// Contains the number of non-pack parameters.
	size_t non_pack_params = params.size();
	for (size_t i = 0; i < params.size(); ++i) {
	NamedDecl *param = params.getParam(i);
	if (param->isParameterPack()) {
	pack_parameter = param;
	non_pack_params = i;
	break;
	}
	}

	// The found template needs to have compatible non-pack template arguments.
	// E.g., ensure that <typename, typename> != <typename>.
	// The pack parameters are compared later.
	if (non_pack_params != instantiation_values.args.size())
	return false;

	// Ensure that <typename...> != <typename>.
	if (pack_parameter.has_value() != instantiation_values.hasParameterPack())
	return false;

	// Compare the first pack parameter that was found with the first pack
	// parameter value. The special case of having an empty parameter pack value
	// always fits to a pack parameter.
	// E.g., ensure that <int...> != <typename...>.
	if (pack_parameter && !instantiation_values.packed_args->args.empty() &&
	!TemplateParameterAllowsValue(
	*pack_parameter, instantiation_values.packed_args->args.front()))
	return false;

	// Compare all the non-pack parameters now.
	// E.g., ensure that <int> != <long>.
	for (const auto pair : llvm::zip_first(instantiation_values.args, params)) {
	const TemplateArgument &passed_arg = std::get<0>(pair);
	NamedDecl *found_param = std::get<1>(pair);
	if (!TemplateParameterAllowsValue(found_param, passed_arg))
	return false;
	}

	return class_template_decl;
	}

	ClassTemplateDecl *TypeSystemClang::CreateClassTemplateDecl(
	DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	lldb::AccessType access_type, llvm::StringRef class_name, int kind,
	const TemplateParameterInfos &template_param_infos) {
	ASTContext &ast = getASTContext();

	ClassTemplateDecl *class_template_decl = nullptr;
	if (decl_ctx == nullptr)
	decl_ctx = ast.getTranslationUnitDecl();

	IdentifierInfo &identifier_info = ast.Idents.get(class_name);
	DeclarationName decl_name(&identifier_info);

	// Search the AST for an existing ClassTemplateDecl that could be reused.
	clang::DeclContext::lookup_result result = decl_ctx->lookup(decl_name);
	for (NamedDecl *decl : result) {
	class_template_decl = dyn_cast<clang::ClassTemplateDecl>(decl);
	if (!class_template_decl)
	continue;
	// The class template has to be able to represents the instantiation
	// values we received. Without this we might end up putting an instantiation
	// with arguments such as <int, int> to a template such as:
	// template<typename T> struct S;
	// Connecting the instantiation to an incompatible template could cause
	// problems later on.
	if (!ClassTemplateAllowsToInstantiationArgs(class_template_decl,
	template_param_infos))
	continue;
	return class_template_decl;
	}

	llvm::SmallVector<NamedDecl *, 8> template_param_decls;

	TemplateParameterList *template_param_list = CreateTemplateParameterList(
	ast, template_param_infos, template_param_decls);

	CXXRecordDecl *template_cxx_decl = CXXRecordDecl::CreateDeserialized(ast, 0);
	template_cxx_decl->setTagKind(static_cast<TagDecl::TagKind>(kind));
	// What decl context do we use here? TU? The actual decl context?
	template_cxx_decl->setDeclContext(decl_ctx);
	template_cxx_decl->setDeclName(decl_name);
	SetOwningModule(template_cxx_decl, owning_module);

	for (size_t i = 0, template_param_decl_count = template_param_decls.size();
	i < template_param_decl_count; ++i) {
	template_param_decls[i]->setDeclContext(template_cxx_decl);
	}

	// With templated classes, we say that a class is templated with
	// specializations, but that the bare class has no functions.
	// template_cxx_decl->startDefinition();
	// template_cxx_decl->completeDefinition();

	class_template_decl = ClassTemplateDecl::CreateDeserialized(ast, 0);
	// What decl context do we use here? TU? The actual decl context?
	class_template_decl->setDeclContext(decl_ctx);
	class_template_decl->setDeclName(decl_name);
	class_template_decl->init(template_cxx_decl, template_param_list);
	template_cxx_decl->setDescribedClassTemplate(class_template_decl);
	SetOwningModule(class_template_decl, owning_module);

	if (access_type != eAccessNone)
	class_template_decl->setAccess(
	ConvertAccessTypeToAccessSpecifier(access_type));

	decl_ctx->addDecl(class_template_decl);

	VerifyDecl(class_template_decl);

	return class_template_decl;
	}

	TemplateTemplateParmDecl *
	TypeSystemClang::CreateTemplateTemplateParmDecl(const char *template_name) {
	ASTContext &ast = getASTContext();

	auto *decl_ctx = ast.getTranslationUnitDecl();

	IdentifierInfo &identifier_info = ast.Idents.get(template_name);
	llvm::SmallVector<NamedDecl *, 8> template_param_decls;

	TypeSystemClang::TemplateParameterInfos template_param_infos;
	TemplateParameterList *template_param_list = CreateTemplateParameterList(
	ast, template_param_infos, template_param_decls);

	// LLDB needs to create those decls only to be able to display a
	// type that includes a template template argument. Only the name matters for
	// this purpose, so we use dummy values for the other characteristics of the
	// type.
	return TemplateTemplateParmDecl::Create(
	ast, decl_ctx, SourceLocation(),
	/Depth/ 0, /Position/ 0,
	/IsParameterPack/ false, &identifier_info, template_param_list);
	}

	ClassTemplateSpecializationDecl *
	TypeSystemClang::CreateClassTemplateSpecializationDecl(
	DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	ClassTemplateDecl *class_template_decl, int kind,
	const TemplateParameterInfos &template_param_infos) {
	ASTContext &ast = getASTContext();
	llvm::SmallVector<clang::TemplateArgument, 2> args(
	template_param_infos.args.size() +
	(template_param_infos.packed_args ? 1 : 0));
	std::copy(template_param_infos.args.begin(), template_param_infos.args.end(),
	args.begin());
	if (template_param_infos.packed_args) {
	args[args.size() - 1] = TemplateArgument::CreatePackCopy(
	ast, template_param_infos.packed_args->args);
	}
	ClassTemplateSpecializationDecl *class_template_specialization_decl =
	ClassTemplateSpecializationDecl::CreateDeserialized(ast, 0);
	class_template_specialization_decl->setTagKind(
	static_cast<TagDecl::TagKind>(kind));
	class_template_specialization_decl->setDeclContext(decl_ctx);
	class_template_specialization_decl->setInstantiationOf(class_template_decl);
	class_template_specialization_decl->setTemplateArgs(
	TemplateArgumentList::CreateCopy(ast, args));
	ast.getTypeDeclType(class_template_specialization_decl, nullptr);
	class_template_specialization_decl->setDeclName(
	class_template_decl->getDeclName());
	SetOwningModule(class_template_specialization_decl, owning_module);
	decl_ctx->addDecl(class_template_specialization_decl);

	class_template_specialization_decl->setSpecializationKind(
	TSK_ExplicitSpecialization);

	return class_template_specialization_decl;
	}

	CompilerType TypeSystemClang::CreateClassTemplateSpecializationType(
	ClassTemplateSpecializationDecl *class_template_specialization_decl) {
	if (class_template_specialization_decl) {
	ASTContext &ast = getASTContext();
	return GetType(ast.getTagDeclType(class_template_specialization_decl));
	}
	return CompilerType();
	}

	static inline bool check_op_param(bool is_method,
	clang::OverloadedOperatorKind op_kind,
	bool unary, bool binary,
	uint32_t num_params) {
	// Special-case call since it can take any number of operands
	if (op_kind == OO_Call)
	return true;

	// The parameter count doesn't include "this"
	if (is_method)
	++num_params;
	if (num_params == 1)
	return unary;
	if (num_params == 2)
	return binary;
	else
	return false;
	}

	bool TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
	bool is_method, clang::OverloadedOperatorKind op_kind,
	uint32_t num_params) {
	switch (op_kind) {
	default:
	break;
	// C++ standard allows any number of arguments to new/delete
	case OO_New:
	case OO_Array_New:
	case OO_Delete:
	case OO_Array_Delete:
	return true;
	}

	#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
	case OO_##Name: \
	return check_op_param(is_method, op_kind, Unary, Binary, num_params);
	switch (op_kind) {
	#include "clang/Basic/OperatorKinds.def"
	default:
	break;
	}
	return false;
	}

	clang::AccessSpecifier
	TypeSystemClang::UnifyAccessSpecifiers(clang::AccessSpecifier lhs,
	clang::AccessSpecifier rhs) {
	// Make the access equal to the stricter of the field and the nested field's
	// access
	if (lhs == AS_none \|\| rhs == AS_none)
	return AS_none;
	if (lhs == AS_private \|\| rhs == AS_private)
	return AS_private;
	if (lhs == AS_protected \|\| rhs == AS_protected)
	return AS_protected;
	return AS_public;
	}

	bool TypeSystemClang::FieldIsBitfield(FieldDecl *field,
	uint32_t &bitfield_bit_size) {
	ASTContext &ast = getASTContext();
	if (field == nullptr)
	return false;

	if (field->isBitField()) {
	Expr *bit_width_expr = field->getBitWidth();
	if (bit_width_expr) {
	if (Optional<llvm::APSInt> bit_width_apsint =
	bit_width_expr->getIntegerConstantExpr(ast)) {
	bitfield_bit_size = bit_width_apsint->getLimitedValue(UINT32_MAX);
	return true;
	}
	}
	}
	return false;
	}

	bool TypeSystemClang::RecordHasFields(const RecordDecl *record_decl) {
	if (record_decl == nullptr)
	return false;

	if (!record_decl->field_empty())
	return true;

	// No fields, lets check this is a CXX record and check the base classes
	const CXXRecordDecl *cxx_record_decl = dyn_cast<CXXRecordDecl>(record_decl);
	if (cxx_record_decl) {
	CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	const CXXRecordDecl *base_class_decl = cast<CXXRecordDecl>(
	base_class->getType()->getAs<RecordType>()->getDecl());
	if (RecordHasFields(base_class_decl))
	return true;
	}
	}
	return false;
	}

	#pragma mark Objective-C Classes

	CompilerType TypeSystemClang::CreateObjCClass(
	llvm::StringRef name, clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module, bool isForwardDecl, bool isInternal,
	ClangASTMetadata *metadata) {
	ASTContext &ast = getASTContext();
	assert(!name.empty());
	if (!decl_ctx)
	decl_ctx = ast.getTranslationUnitDecl();

	ObjCInterfaceDecl *decl = ObjCInterfaceDecl::CreateDeserialized(ast, 0);
	decl->setDeclContext(decl_ctx);
	decl->setDeclName(&ast.Idents.get(name));
	/isForwardDecl,/
	decl->setImplicit(isInternal);
	SetOwningModule(decl, owning_module);

	if (metadata)
	SetMetadata(decl, *metadata);

	return GetType(ast.getObjCInterfaceType(decl));
	}

	static inline bool BaseSpecifierIsEmpty(const CXXBaseSpecifier *b) {
	return !TypeSystemClang::RecordHasFields(b->getType()->getAsCXXRecordDecl());
	}

	uint32_t
	TypeSystemClang::GetNumBaseClasses(const CXXRecordDecl *cxx_record_decl,
	bool omit_empty_base_classes) {
	uint32_t num_bases = 0;
	if (cxx_record_decl) {
	if (omit_empty_base_classes) {
	CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	// Skip empty base classes
	if (BaseSpecifierIsEmpty(base_class))
	continue;
	++num_bases;
	}
	} else
	num_bases = cxx_record_decl->getNumBases();
	}
	return num_bases;
	}

	#pragma mark Namespace Declarations

	NamespaceDecl *TypeSystemClang::GetUniqueNamespaceDeclaration(
	const char name, clang::DeclContext decl_ctx,
	OptionalClangModuleID owning_module, bool is_inline) {
	NamespaceDecl *namespace_decl = nullptr;
	ASTContext &ast = getASTContext();
	TranslationUnitDecl *translation_unit_decl = ast.getTranslationUnitDecl();
	if (!decl_ctx)
	decl_ctx = translation_unit_decl;

	if (name) {
	IdentifierInfo &identifier_info = ast.Idents.get(name);
	DeclarationName decl_name(&identifier_info);
	clang::DeclContext::lookup_result result = decl_ctx->lookup(decl_name);
	for (NamedDecl *decl : result) {
	namespace_decl = dyn_cast<clang::NamespaceDecl>(decl);
	if (namespace_decl)
	return namespace_decl;
	}

	namespace_decl =
	NamespaceDecl::Create(ast, decl_ctx, is_inline, SourceLocation(),
	SourceLocation(), &identifier_info, nullptr);

	decl_ctx->addDecl(namespace_decl);
	} else {
	if (decl_ctx == translation_unit_decl) {
	namespace_decl = translation_unit_decl->getAnonymousNamespace();
	if (namespace_decl)
	return namespace_decl;

	namespace_decl =
	NamespaceDecl::Create(ast, decl_ctx, false, SourceLocation(),
	SourceLocation(), nullptr, nullptr);
	translation_unit_decl->setAnonymousNamespace(namespace_decl);
	translation_unit_decl->addDecl(namespace_decl);
	assert(namespace_decl == translation_unit_decl->getAnonymousNamespace());
	} else {
	NamespaceDecl *parent_namespace_decl = cast<NamespaceDecl>(decl_ctx);
	if (parent_namespace_decl) {
	namespace_decl = parent_namespace_decl->getAnonymousNamespace();
	if (namespace_decl)
	return namespace_decl;
	namespace_decl =
	NamespaceDecl::Create(ast, decl_ctx, false, SourceLocation(),
	SourceLocation(), nullptr, nullptr);
	parent_namespace_decl->setAnonymousNamespace(namespace_decl);
	parent_namespace_decl->addDecl(namespace_decl);
	assert(namespace_decl ==
	parent_namespace_decl->getAnonymousNamespace());
	} else {
	assert(false && "GetUniqueNamespaceDeclaration called with no name and "
	"no namespace as decl_ctx");
	}
	}
	}
	// Note: namespaces can span multiple modules, so perhaps this isn't a good
	// idea.
	SetOwningModule(namespace_decl, owning_module);

	VerifyDecl(namespace_decl);
	return namespace_decl;
	}

	clang::BlockDecl *
	TypeSystemClang::CreateBlockDeclaration(clang::DeclContext *ctx,
	OptionalClangModuleID owning_module) {
	if (ctx) {
	clang::BlockDecl *decl =
	clang::BlockDecl::CreateDeserialized(getASTContext(), 0);
	decl->setDeclContext(ctx);
	ctx->addDecl(decl);
	SetOwningModule(decl, owning_module);
	return decl;
	}
	return nullptr;
	}

	clang::DeclContext FindLCABetweenDecls(clang::DeclContext left,
	clang::DeclContext *right,
	clang::DeclContext *root) {
	if (root == nullptr)
	return nullptr;

	std::set<clang::DeclContext *> path_left;
	for (clang::DeclContext *d = left; d != nullptr; d = d->getParent())
	path_left.insert(d);

	for (clang::DeclContext *d = right; d != nullptr; d = d->getParent())
	if (path_left.find(d) != path_left.end())
	return d;

	return nullptr;
	}

	clang::UsingDirectiveDecl *TypeSystemClang::CreateUsingDirectiveDeclaration(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	clang::NamespaceDecl *ns_decl) {
	if (decl_ctx && ns_decl) {
	auto *translation_unit = getASTContext().getTranslationUnitDecl();
	clang::UsingDirectiveDecl *using_decl = clang::UsingDirectiveDecl::Create(
	getASTContext(), decl_ctx, clang::SourceLocation(),
	clang::SourceLocation(), clang::NestedNameSpecifierLoc(),
	clang::SourceLocation(), ns_decl,
	FindLCABetweenDecls(decl_ctx, ns_decl,
	translation_unit));
	decl_ctx->addDecl(using_decl);
	SetOwningModule(using_decl, owning_module);
	return using_decl;
	}
	return nullptr;
	}

	clang::UsingDecl *
	TypeSystemClang::CreateUsingDeclaration(clang::DeclContext *current_decl_ctx,
	OptionalClangModuleID owning_module,
	clang::NamedDecl *target) {
	if (current_decl_ctx && target) {
	clang::UsingDecl *using_decl = clang::UsingDecl::Create(
	getASTContext(), current_decl_ctx, clang::SourceLocation(),
	clang::NestedNameSpecifierLoc(), clang::DeclarationNameInfo(), false);
	SetOwningModule(using_decl, owning_module);
	clang::UsingShadowDecl *shadow_decl = clang::UsingShadowDecl::Create(
	getASTContext(), current_decl_ctx, clang::SourceLocation(),
	target->getDeclName(), using_decl, target);
	SetOwningModule(shadow_decl, owning_module);
	using_decl->addShadowDecl(shadow_decl);
	current_decl_ctx->addDecl(using_decl);
	return using_decl;
	}
	return nullptr;
	}

	clang::VarDecl *TypeSystemClang::CreateVariableDeclaration(
	clang::DeclContext *decl_context, OptionalClangModuleID owning_module,
	const char *name, clang::QualType type) {
	if (decl_context) {
	clang::VarDecl *var_decl =
	clang::VarDecl::CreateDeserialized(getASTContext(), 0);
	var_decl->setDeclContext(decl_context);
	if (name && name[0])
	var_decl->setDeclName(&getASTContext().Idents.getOwn(name));
	var_decl->setType(type);
	SetOwningModule(var_decl, owning_module);
	var_decl->setAccess(clang::AS_public);
	decl_context->addDecl(var_decl);
	return var_decl;
	}
	return nullptr;
	}

	lldb::opaque_compiler_type_t
	TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast,
	lldb::BasicType basic_type) {
	switch (basic_type) {
	case eBasicTypeVoid:
	return ast->VoidTy.getAsOpaquePtr();
	case eBasicTypeChar:
	return ast->CharTy.getAsOpaquePtr();
	case eBasicTypeSignedChar:
	return ast->SignedCharTy.getAsOpaquePtr();
	case eBasicTypeUnsignedChar:
	return ast->UnsignedCharTy.getAsOpaquePtr();
	case eBasicTypeWChar:
	return ast->getWCharType().getAsOpaquePtr();
	case eBasicTypeSignedWChar:
	return ast->getSignedWCharType().getAsOpaquePtr();
	case eBasicTypeUnsignedWChar:
	return ast->getUnsignedWCharType().getAsOpaquePtr();
	case eBasicTypeChar8:
	return ast->Char8Ty.getAsOpaquePtr();
	case eBasicTypeChar16:
	return ast->Char16Ty.getAsOpaquePtr();
	case eBasicTypeChar32:
	return ast->Char32Ty.getAsOpaquePtr();
	case eBasicTypeShort:
	return ast->ShortTy.getAsOpaquePtr();
	case eBasicTypeUnsignedShort:
	return ast->UnsignedShortTy.getAsOpaquePtr();
	case eBasicTypeInt:
	return ast->IntTy.getAsOpaquePtr();
	case eBasicTypeUnsignedInt:
	return ast->UnsignedIntTy.getAsOpaquePtr();
	case eBasicTypeLong:
	return ast->LongTy.getAsOpaquePtr();
	case eBasicTypeUnsignedLong:
	return ast->UnsignedLongTy.getAsOpaquePtr();
	case eBasicTypeLongLong:
	return ast->LongLongTy.getAsOpaquePtr();
	case eBasicTypeUnsignedLongLong:
	return ast->UnsignedLongLongTy.getAsOpaquePtr();
	case eBasicTypeInt128:
	return ast->Int128Ty.getAsOpaquePtr();
	case eBasicTypeUnsignedInt128:
	return ast->UnsignedInt128Ty.getAsOpaquePtr();
	case eBasicTypeBool:
	return ast->BoolTy.getAsOpaquePtr();
	case eBasicTypeHalf:
	return ast->HalfTy.getAsOpaquePtr();
	case eBasicTypeFloat:
	return ast->FloatTy.getAsOpaquePtr();
	case eBasicTypeDouble:
	return ast->DoubleTy.getAsOpaquePtr();
	case eBasicTypeLongDouble:
	return ast->LongDoubleTy.getAsOpaquePtr();
	case eBasicTypeFloatComplex:
	return ast->getComplexType(ast->FloatTy).getAsOpaquePtr();
	case eBasicTypeDoubleComplex:
	return ast->getComplexType(ast->DoubleTy).getAsOpaquePtr();
	case eBasicTypeLongDoubleComplex:
	return ast->getComplexType(ast->LongDoubleTy).getAsOpaquePtr();
	case eBasicTypeObjCID:
	return ast->getObjCIdType().getAsOpaquePtr();
	case eBasicTypeObjCClass:
	return ast->getObjCClassType().getAsOpaquePtr();
	case eBasicTypeObjCSel:
	return ast->getObjCSelType().getAsOpaquePtr();
	case eBasicTypeNullPtr:
	return ast->NullPtrTy.getAsOpaquePtr();
	default:
	return nullptr;
	}
	}

	#pragma mark Function Types

	clang::DeclarationName
	TypeSystemClang::GetDeclarationName(llvm::StringRef name,
	const CompilerType &function_clang_type) {
	clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;
	if (!IsOperator(name, op_kind) \|\| op_kind == clang::NUM_OVERLOADED_OPERATORS)
	return DeclarationName(&getASTContext().Idents.get(
	name)); // Not operator, but a regular function.

	// Check the number of operator parameters. Sometimes we have seen bad DWARF
	// that doesn't correctly describe operators and if we try to create a method
	// and add it to the class, clang will assert and crash, so we need to make
	// sure things are acceptable.
	clang::QualType method_qual_type(ClangUtil::GetQualType(function_clang_type));
	const clang::FunctionProtoType *function_type =
	llvm::dyn_cast<clang::FunctionProtoType>(method_qual_type.getTypePtr());
	if (function_type == nullptr)
	return clang::DeclarationName();

	const bool is_method = false;
	const unsigned int num_params = function_type->getNumParams();
	if (!TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
	is_method, op_kind, num_params))
	return clang::DeclarationName();

	return getASTContext().DeclarationNames.getCXXOperatorName(op_kind);
	}

	PrintingPolicy TypeSystemClang::GetTypePrintingPolicy() {
	clang::PrintingPolicy printing_policy(getASTContext().getPrintingPolicy());
	printing_policy.SuppressTagKeyword = true;
	// Inline namespaces are important for some type formatters (e.g., libc++
	// and libstdc++ are differentiated by their inline namespaces).
	printing_policy.SuppressInlineNamespace = false;
	printing_policy.SuppressUnwrittenScope = false;
	// Default arguments are also always important for type formatters. Otherwise
	// we would need to always specify two type names for the setups where we do
	// know the default arguments and where we don't know default arguments.
	//
	// For example, without this we would need to have formatters for both:
	// std::basic_string<char>
	// and
	// std::basic_string<char, std::char_traits<char>, std::allocator<char> >
	// to support setups where LLDB was able to reconstruct default arguments
	// (and we then would have suppressed them from the type name) and also setups
	// where LLDB wasn't able to reconstruct the default arguments.
	printing_policy.SuppressDefaultTemplateArgs = false;
	return printing_policy;
	}

	std::string TypeSystemClang::GetTypeNameForDecl(const NamedDecl *named_decl) {
	clang::PrintingPolicy printing_policy = GetTypePrintingPolicy();
	std::string result;
	llvm::raw_string_ostream os(result);
	named_decl->printQualifiedName(os, printing_policy);
	return result;
	}

	FunctionDecl *TypeSystemClang::CreateFunctionDeclaration(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	llvm::StringRef name, const CompilerType &function_clang_type,
	clang::StorageClass storage, bool is_inline) {
	FunctionDecl *func_decl = nullptr;
	ASTContext &ast = getASTContext();
	if (!decl_ctx)
	decl_ctx = ast.getTranslationUnitDecl();

	const bool hasWrittenPrototype = true;
	const bool isConstexprSpecified = false;

	clang::DeclarationName declarationName =
	GetDeclarationName(name, function_clang_type);
	func_decl = FunctionDecl::CreateDeserialized(ast, 0);
	func_decl->setDeclContext(decl_ctx);
	func_decl->setDeclName(declarationName);
	func_decl->setType(ClangUtil::GetQualType(function_clang_type));
	func_decl->setStorageClass(storage);
	func_decl->setInlineSpecified(is_inline);
	func_decl->setHasWrittenPrototype(hasWrittenPrototype);
	func_decl->setConstexprKind(isConstexprSpecified
	? ConstexprSpecKind::Constexpr
	: ConstexprSpecKind::Unspecified);
	SetOwningModule(func_decl, owning_module);
	decl_ctx->addDecl(func_decl);

	VerifyDecl(func_decl);

	return func_decl;
	}

	CompilerType
	TypeSystemClang::CreateFunctionType(const CompilerType &result_type,
	const CompilerType *args, unsigned num_args,
	bool is_variadic, unsigned type_quals,
	clang::CallingConv cc) {
	if (!result_type \|\| !ClangUtil::IsClangType(result_type))
	return CompilerType(); // invalid return type

	std::vector<QualType> qual_type_args;
	if (num_args > 0 && args == nullptr)
	return CompilerType(); // invalid argument array passed in

	// Verify that all arguments are valid and the right type
	for (unsigned i = 0; i < num_args; ++i) {
	if (args[i]) {
	// Make sure we have a clang type in args[i] and not a type from another
	// language whose name might match
	const bool is_clang_type = ClangUtil::IsClangType(args[i]);
	lldbassert(is_clang_type);
	if (is_clang_type)
	qual_type_args.push_back(ClangUtil::GetQualType(args[i]));
	else
	return CompilerType(); // invalid argument type (must be a clang type)
	} else
	return CompilerType(); // invalid argument type (empty)
	}

	// TODO: Detect calling convention in DWARF?
	FunctionProtoType::ExtProtoInfo proto_info;
	proto_info.ExtInfo = cc;
	proto_info.Variadic = is_variadic;
	proto_info.ExceptionSpec = EST_None;
	proto_info.TypeQuals = clang::Qualifiers::fromFastMask(type_quals);
	proto_info.RefQualifier = RQ_None;

	return GetType(getASTContext().getFunctionType(
	ClangUtil::GetQualType(result_type), qual_type_args, proto_info));
	}

	ParmVarDecl *TypeSystemClang::CreateParameterDeclaration(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	const char *name, const CompilerType &param_type, int storage,
	bool add_decl) {
	ASTContext &ast = getASTContext();
	auto *decl = ParmVarDecl::CreateDeserialized(ast, 0);
	decl->setDeclContext(decl_ctx);
	if (name && name[0])
	decl->setDeclName(&ast.Idents.get(name));
	decl->setType(ClangUtil::GetQualType(param_type));
	decl->setStorageClass(static_cast<clang::StorageClass>(storage));
	SetOwningModule(decl, owning_module);
	if (add_decl)
	decl_ctx->addDecl(decl);

	return decl;
	}

	void TypeSystemClang::SetFunctionParameters(
	FunctionDecl function_decl, llvm::ArrayRef<ParmVarDecl > params) {
	if (function_decl)
	function_decl->setParams(params);
	}

	CompilerType
	TypeSystemClang::CreateBlockPointerType(const CompilerType &function_type) {
	QualType block_type = m_ast_up->getBlockPointerType(
	clang::QualType::getFromOpaquePtr(function_type.GetOpaqueQualType()));

	return GetType(block_type);
	}

	#pragma mark Array Types

	CompilerType TypeSystemClang::CreateArrayType(const CompilerType &element_type,
	size_t element_count,
	bool is_vector) {
	if (element_type.IsValid()) {
	ASTContext &ast = getASTContext();

	if (is_vector) {
	return GetType(ast.getExtVectorType(ClangUtil::GetQualType(element_type),
	element_count));
	} else {

	llvm::APInt ap_element_count(64, element_count);
	if (element_count == 0) {
	return GetType(ast.getIncompleteArrayType(
	ClangUtil::GetQualType(element_type), clang::ArrayType::Normal, 0));
	} else {
	return GetType(ast.getConstantArrayType(
	ClangUtil::GetQualType(element_type), ap_element_count, nullptr,
	clang::ArrayType::Normal, 0));
	}
	}
	}
	return CompilerType();
	}

	CompilerType TypeSystemClang::CreateStructForIdentifier(
	ConstString type_name,
	const std::initializer_list<std::pair<const char *, CompilerType>>
	&type_fields,
	bool packed) {
	CompilerType type;
	if (!type_name.IsEmpty() &&
	(type = GetTypeForIdentifier<clang::CXXRecordDecl>(type_name))
	.IsValid()) {
	lldbassert(0 && "Trying to create a type for an existing name");
	return type;
	}

	type = CreateRecordType(nullptr, OptionalClangModuleID(), lldb::eAccessPublic,
	type_name.GetCString(), clang::TTK_Struct,
	lldb::eLanguageTypeC);
	StartTagDeclarationDefinition(type);
	for (const auto &field : type_fields)
	AddFieldToRecordType(type, field.first, field.second, lldb::eAccessPublic,
	0);
	if (packed)
	SetIsPacked(type);
	CompleteTagDeclarationDefinition(type);
	return type;
	}

	CompilerType TypeSystemClang::GetOrCreateStructForIdentifier(
	ConstString type_name,
	const std::initializer_list<std::pair<const char *, CompilerType>>
	&type_fields,
	bool packed) {
	CompilerType type;
	if ((type = GetTypeForIdentifier<clang::CXXRecordDecl>(type_name)).IsValid())
	return type;

	return CreateStructForIdentifier(type_name, type_fields, packed);
	}

	#pragma mark Enumeration Types

	CompilerType TypeSystemClang::CreateEnumerationType(
	llvm::StringRef name, clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module, const Declaration &decl,
	const CompilerType &integer_clang_type, bool is_scoped) {
	// TODO: Do something intelligent with the Declaration object passed in
	// like maybe filling in the SourceLocation with it...
	ASTContext &ast = getASTContext();

	// TODO: ask about these...
	// const bool IsFixed = false;
	EnumDecl *enum_decl = EnumDecl::CreateDeserialized(ast, 0);
	enum_decl->setDeclContext(decl_ctx);
	if (!name.empty())
	enum_decl->setDeclName(&ast.Idents.get(name));
	enum_decl->setScoped(is_scoped);
	enum_decl->setScopedUsingClassTag(is_scoped);
	enum_decl->setFixed(false);
	SetOwningModule(enum_decl, owning_module);
	if (decl_ctx)
	decl_ctx->addDecl(enum_decl);

	// TODO: check if we should be setting the promotion type too?
	enum_decl->setIntegerType(ClangUtil::GetQualType(integer_clang_type));

	enum_decl->setAccess(AS_public); // TODO respect what's in the debug info

	return GetType(ast.getTagDeclType(enum_decl));
	}

	CompilerType TypeSystemClang::GetIntTypeFromBitSize(size_t bit_size,
	bool is_signed) {
	clang::ASTContext &ast = getASTContext();

	if (is_signed) {
	if (bit_size == ast.getTypeSize(ast.SignedCharTy))
	return GetType(ast.SignedCharTy);

	if (bit_size == ast.getTypeSize(ast.ShortTy))
	return GetType(ast.ShortTy);

	if (bit_size == ast.getTypeSize(ast.IntTy))
	return GetType(ast.IntTy);

	if (bit_size == ast.getTypeSize(ast.LongTy))
	return GetType(ast.LongTy);

	if (bit_size == ast.getTypeSize(ast.LongLongTy))
	return GetType(ast.LongLongTy);

	if (bit_size == ast.getTypeSize(ast.Int128Ty))
	return GetType(ast.Int128Ty);
	} else {
	if (bit_size == ast.getTypeSize(ast.UnsignedCharTy))
	return GetType(ast.UnsignedCharTy);

	if (bit_size == ast.getTypeSize(ast.UnsignedShortTy))
	return GetType(ast.UnsignedShortTy);

	if (bit_size == ast.getTypeSize(ast.UnsignedIntTy))
	return GetType(ast.UnsignedIntTy);

	if (bit_size == ast.getTypeSize(ast.UnsignedLongTy))
	return GetType(ast.UnsignedLongTy);

	if (bit_size == ast.getTypeSize(ast.UnsignedLongLongTy))
	return GetType(ast.UnsignedLongLongTy);

	if (bit_size == ast.getTypeSize(ast.UnsignedInt128Ty))
	return GetType(ast.UnsignedInt128Ty);
	}
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetPointerSizedIntType(bool is_signed) {
	return GetIntTypeFromBitSize(
	getASTContext().getTypeSize(getASTContext().VoidPtrTy), is_signed);
	}

	void TypeSystemClang::DumpDeclContextHiearchy(clang::DeclContext *decl_ctx) {
	if (decl_ctx) {
	DumpDeclContextHiearchy(decl_ctx->getParent());

	clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl_ctx);
	if (named_decl) {
	printf("%20s: %s\n", decl_ctx->getDeclKindName(),
	named_decl->getDeclName().getAsString().c_str());
	} else {
	printf("%20s\n", decl_ctx->getDeclKindName());
	}
	}
	}

	void TypeSystemClang::DumpDeclHiearchy(clang::Decl *decl) {
	if (decl == nullptr)
	return;
	DumpDeclContextHiearchy(decl->getDeclContext());

	clang::RecordDecl *record_decl = llvm::dyn_cast<clang::RecordDecl>(decl);
	if (record_decl) {
	printf("%20s: %s%s\n", decl->getDeclKindName(),
	record_decl->getDeclName().getAsString().c_str(),
	record_decl->isInjectedClassName() ? " (injected class name)" : "");

	} else {
	clang::NamedDecl *named_decl = llvm::dyn_cast<clang::NamedDecl>(decl);
	if (named_decl) {
	printf("%20s: %s\n", decl->getDeclKindName(),
	named_decl->getDeclName().getAsString().c_str());
	} else {
	printf("%20s\n", decl->getDeclKindName());
	}
	}
	}

	bool TypeSystemClang::DeclsAreEquivalent(clang::Decl *lhs_decl,
	clang::Decl *rhs_decl) {
	if (lhs_decl && rhs_decl) {
	// Make sure the decl kinds match first
	const clang::Decl::Kind lhs_decl_kind = lhs_decl->getKind();
	const clang::Decl::Kind rhs_decl_kind = rhs_decl->getKind();

	if (lhs_decl_kind == rhs_decl_kind) {
	// Now check that the decl contexts kinds are all equivalent before we
	// have to check any names of the decl contexts...
	clang::DeclContext *lhs_decl_ctx = lhs_decl->getDeclContext();
	clang::DeclContext *rhs_decl_ctx = rhs_decl->getDeclContext();
	if (lhs_decl_ctx && rhs_decl_ctx) {
	while (true) {
	if (lhs_decl_ctx && rhs_decl_ctx) {
	const clang::Decl::Kind lhs_decl_ctx_kind =
	lhs_decl_ctx->getDeclKind();
	const clang::Decl::Kind rhs_decl_ctx_kind =
	rhs_decl_ctx->getDeclKind();
	if (lhs_decl_ctx_kind == rhs_decl_ctx_kind) {
	lhs_decl_ctx = lhs_decl_ctx->getParent();
	rhs_decl_ctx = rhs_decl_ctx->getParent();

	if (lhs_decl_ctx == nullptr && rhs_decl_ctx == nullptr)
	break;
	} else
	return false;
	} else
	return false;
	}

	// Now make sure the name of the decls match
	clang::NamedDecl *lhs_named_decl =
	llvm::dyn_cast<clang::NamedDecl>(lhs_decl);
	clang::NamedDecl *rhs_named_decl =
	llvm::dyn_cast<clang::NamedDecl>(rhs_decl);
	if (lhs_named_decl && rhs_named_decl) {
	clang::DeclarationName lhs_decl_name = lhs_named_decl->getDeclName();
	clang::DeclarationName rhs_decl_name = rhs_named_decl->getDeclName();
	if (lhs_decl_name.getNameKind() == rhs_decl_name.getNameKind()) {
	if (lhs_decl_name.getAsString() != rhs_decl_name.getAsString())
	return false;
	} else
	return false;
	} else
	return false;

	// We know that the decl context kinds all match, so now we need to
	// make sure the names match as well
	lhs_decl_ctx = lhs_decl->getDeclContext();
	rhs_decl_ctx = rhs_decl->getDeclContext();
	while (true) {
	switch (lhs_decl_ctx->getDeclKind()) {
	case clang::Decl::TranslationUnit:
	// We don't care about the translation unit names
	return true;
	default: {
	clang::NamedDecl *lhs_named_decl =
	llvm::dyn_cast<clang::NamedDecl>(lhs_decl_ctx);
	clang::NamedDecl *rhs_named_decl =
	llvm::dyn_cast<clang::NamedDecl>(rhs_decl_ctx);
	if (lhs_named_decl && rhs_named_decl) {
	clang::DeclarationName lhs_decl_name =
	lhs_named_decl->getDeclName();
	clang::DeclarationName rhs_decl_name =
	rhs_named_decl->getDeclName();
	if (lhs_decl_name.getNameKind() == rhs_decl_name.getNameKind()) {
	if (lhs_decl_name.getAsString() != rhs_decl_name.getAsString())
	return false;
	} else
	return false;
	} else
	return false;
	} break;
	}
	lhs_decl_ctx = lhs_decl_ctx->getParent();
	rhs_decl_ctx = rhs_decl_ctx->getParent();
	}
	}
	}
	}
	return false;
	}
	bool TypeSystemClang::GetCompleteDecl(clang::ASTContext *ast,
	clang::Decl *decl) {
	if (!decl)
	return false;

	ExternalASTSource *ast_source = ast->getExternalSource();

	if (!ast_source)
	return false;

	if (clang::TagDecl *tag_decl = llvm::dyn_cast<clang::TagDecl>(decl)) {
	if (tag_decl->isCompleteDefinition())
	return true;

	if (!tag_decl->hasExternalLexicalStorage())
	return false;

	ast_source->CompleteType(tag_decl);

	return !tag_decl->getTypeForDecl()->isIncompleteType();
	} else if (clang::ObjCInterfaceDecl *objc_interface_decl =
	llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl)) {
	if (objc_interface_decl->getDefinition())
	return true;

	if (!objc_interface_decl->hasExternalLexicalStorage())
	return false;

	ast_source->CompleteType(objc_interface_decl);

	return !objc_interface_decl->getTypeForDecl()->isIncompleteType();
	} else {
	return false;
	}
	}

	void TypeSystemClang::SetMetadataAsUserID(const clang::Decl *decl,
	user_id_t user_id) {
	ClangASTMetadata meta_data;
	meta_data.SetUserID(user_id);
	SetMetadata(decl, meta_data);
	}

	void TypeSystemClang::SetMetadataAsUserID(const clang::Type *type,
	user_id_t user_id) {
	ClangASTMetadata meta_data;
	meta_data.SetUserID(user_id);
	SetMetadata(type, meta_data);
	}

	void TypeSystemClang::SetMetadata(const clang::Decl *object,
	ClangASTMetadata &metadata) {
	m_decl_metadata[object] = metadata;
	}

	void TypeSystemClang::SetMetadata(const clang::Type *object,
	ClangASTMetadata &metadata) {
	m_type_metadata[object] = metadata;
	}

	ClangASTMetadata TypeSystemClang::GetMetadata(const clang::Decl object) {
	auto It = m_decl_metadata.find(object);
	if (It != m_decl_metadata.end())
	return &It->second;
	return nullptr;
	}

	ClangASTMetadata TypeSystemClang::GetMetadata(const clang::Type object) {
	auto It = m_type_metadata.find(object);
	if (It != m_type_metadata.end())
	return &It->second;
	return nullptr;
	}

	void TypeSystemClang::SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object,
	clang::AccessSpecifier access) {
	if (access == clang::AccessSpecifier::AS_none)
	m_cxx_record_decl_access.erase(object);
	else
	m_cxx_record_decl_access[object] = access;
	}

	clang::AccessSpecifier
	TypeSystemClang::GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object) {
	auto It = m_cxx_record_decl_access.find(object);
	if (It != m_cxx_record_decl_access.end())
	return It->second;
	return clang::AccessSpecifier::AS_none;
	}

	clang::DeclContext *
	TypeSystemClang::GetDeclContextForType(const CompilerType &type) {
	return GetDeclContextForType(ClangUtil::GetQualType(type));
	}

	/// Aggressively desugar the provided type, skipping past various kinds of
	/// syntactic sugar and other constructs one typically wants to ignore.
	/// The \p mask argument allows one to skip certain kinds of simplifications,
	/// when one wishes to handle a certain kind of type directly.
	static QualType
	RemoveWrappingTypes(QualType type, ArrayRef<clang::Type::TypeClass> mask = {}) {
	while (true) {
	if (find(mask, type->getTypeClass()) != mask.end())
	return type;
	switch (type->getTypeClass()) {
	// This is not fully correct as _Atomic is more than sugar, but it is
	// sufficient for the purposes we care about.
	case clang::Type::Atomic:
	type = cast<clang::AtomicType>(type)->getValueType();
	break;
	case clang::Type::Auto:
	case clang::Type::Decltype:
	case clang::Type::Elaborated:
	case clang::Type::Paren:
	case clang::Type::SubstTemplateTypeParm:
	case clang::Type::TemplateSpecialization:
	case clang::Type::Typedef:
	case clang::Type::TypeOf:
	case clang::Type::TypeOfExpr:
	case clang::Type::Using:
	type = type->getLocallyUnqualifiedSingleStepDesugaredType();
	break;
	default:
	return type;
	}
	}
	}

	clang::DeclContext *
	TypeSystemClang::GetDeclContextForType(clang::QualType type) {
	if (type.isNull())
	return nullptr;

	clang::QualType qual_type = RemoveWrappingTypes(type.getCanonicalType());
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::ObjCInterface:
	return llvm::cast<clang::ObjCObjectType>(qual_type.getTypePtr())
	->getInterface();
	case clang::Type::ObjCObjectPointer:
	return GetDeclContextForType(
	llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
	->getPointeeType());
	case clang::Type::Record:
	return llvm::cast<clang::RecordType>(qual_type)->getDecl();
	case clang::Type::Enum:
	return llvm::cast<clang::EnumType>(qual_type)->getDecl();
	default:
	break;
	}
	// No DeclContext in this type...
	return nullptr;
	}

	static bool GetCompleteQualType(clang::ASTContext *ast,
	clang::QualType qual_type,
	bool allow_completion = true) {
	qual_type = RemoveWrappingTypes(qual_type);
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::ConstantArray:
	case clang::Type::IncompleteArray:
	case clang::Type::VariableArray: {
	const clang::ArrayType *array_type =
	llvm::dyn_cast<clang::ArrayType>(qual_type.getTypePtr());

	if (array_type)
	return GetCompleteQualType(ast, array_type->getElementType(),
	allow_completion);
	} break;
	case clang::Type::Record: {
	clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	if (cxx_record_decl->hasExternalLexicalStorage()) {
	const bool is_complete = cxx_record_decl->isCompleteDefinition();
	const bool fields_loaded =
	cxx_record_decl->hasLoadedFieldsFromExternalStorage();
	if (is_complete && fields_loaded)
	return true;

	if (!allow_completion)
	return false;

	// Call the field_begin() accessor to for it to use the external source
	// to load the fields...
	clang::ExternalASTSource *external_ast_source =
	ast->getExternalSource();
	if (external_ast_source) {
	external_ast_source->CompleteType(cxx_record_decl);
	if (cxx_record_decl->isCompleteDefinition()) {
	cxx_record_decl->field_begin();
	cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true);
	}
	}
	}
	}
	const clang::TagType *tag_type =
	llvm::cast<clang::TagType>(qual_type.getTypePtr());
	return !tag_type->isIncompleteType();
	} break;

	case clang::Type::Enum: {
	const clang::TagType *tag_type =
	llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
	if (tag_type) {
	clang::TagDecl *tag_decl = tag_type->getDecl();
	if (tag_decl) {
	if (tag_decl->getDefinition())
	return true;

	if (!allow_completion)
	return false;

	if (tag_decl->hasExternalLexicalStorage()) {
	if (ast) {
	clang::ExternalASTSource *external_ast_source =
	ast->getExternalSource();
	if (external_ast_source) {
	external_ast_source->CompleteType(tag_decl);
	return !tag_type->isIncompleteType();
	}
	}
	}
	return false;
	}
	}

	} break;
	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface: {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	// We currently can't complete objective C types through the newly added
	// ASTContext because it only supports TagDecl objects right now...
	if (class_interface_decl) {
	if (class_interface_decl->getDefinition())
	return true;

	if (!allow_completion)
	return false;

	if (class_interface_decl->hasExternalLexicalStorage()) {
	if (ast) {
	clang::ExternalASTSource *external_ast_source =
	ast->getExternalSource();
	if (external_ast_source) {
	external_ast_source->CompleteType(class_interface_decl);
	return !objc_class_type->isIncompleteType();
	}
	}
	}
	return false;
	}
	}
	} break;

	case clang::Type::Attributed:
	return GetCompleteQualType(
	ast, llvm::cast<clang::AttributedType>(qual_type)->getModifiedType(),
	allow_completion);

	default:
	break;
	}

	return true;
	}

	static clang::ObjCIvarDecl::AccessControl
	ConvertAccessTypeToObjCIvarAccessControl(AccessType access) {
	switch (access) {
	case eAccessNone:
	return clang::ObjCIvarDecl::None;
	case eAccessPublic:
	return clang::ObjCIvarDecl::Public;
	case eAccessPrivate:
	return clang::ObjCIvarDecl::Private;
	case eAccessProtected:
	return clang::ObjCIvarDecl::Protected;
	case eAccessPackage:
	return clang::ObjCIvarDecl::Package;
	}
	return clang::ObjCIvarDecl::None;
	}

	// Tests

	#ifndef NDEBUG
	bool TypeSystemClang::Verify(lldb::opaque_compiler_type_t type) {
	return !type \|\| llvm::isa<clang::Type>(GetQualType(type).getTypePtr());
	}
	#endif

	bool TypeSystemClang::IsAggregateType(lldb::opaque_compiler_type_t type) {
	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::IncompleteArray:
	case clang::Type::VariableArray:
	case clang::Type::ConstantArray:
	case clang::Type::ExtVector:
	case clang::Type::Vector:
	case clang::Type::Record:
	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	return true;
	default:
	break;
	}
	// The clang type does have a value
	return false;
	}

	bool TypeSystemClang::IsAnonymousType(lldb::opaque_compiler_type_t type) {
	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record: {
	if (const clang::RecordType *record_type =
	llvm::dyn_cast_or_null<clang::RecordType>(
	qual_type.getTypePtrOrNull())) {
	if (const clang::RecordDecl *record_decl = record_type->getDecl()) {
	return record_decl->isAnonymousStructOrUnion();
	}
	}
	break;
	}
	default:
	break;
	}
	// The clang type does have a value
	return false;
	}

	bool TypeSystemClang::IsArrayType(lldb::opaque_compiler_type_t type,
	CompilerType *element_type_ptr,
	uint64_t size, bool is_incomplete) {
	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	default:
	break;

	case clang::Type::ConstantArray:
	if (element_type_ptr)
	element_type_ptr->SetCompilerType(
	this, llvm::cast<clang::ConstantArrayType>(qual_type)
	->getElementType()
	.getAsOpaquePtr());
	if (size)
	*size = llvm::cast<clang::ConstantArrayType>(qual_type)
	->getSize()
	.getLimitedValue(ULLONG_MAX);
	if (is_incomplete)
	*is_incomplete = false;
	return true;

	case clang::Type::IncompleteArray:
	if (element_type_ptr)
	element_type_ptr->SetCompilerType(
	this, llvm::cast<clang::IncompleteArrayType>(qual_type)
	->getElementType()
	.getAsOpaquePtr());
	if (size)
	*size = 0;
	if (is_incomplete)
	*is_incomplete = true;
	return true;

	case clang::Type::VariableArray:
	if (element_type_ptr)
	element_type_ptr->SetCompilerType(
	this, llvm::cast<clang::VariableArrayType>(qual_type)
	->getElementType()
	.getAsOpaquePtr());
	if (size)
	*size = 0;
	if (is_incomplete)
	*is_incomplete = false;
	return true;

	case clang::Type::DependentSizedArray:
	if (element_type_ptr)
	element_type_ptr->SetCompilerType(
	this, llvm::cast<clang::DependentSizedArrayType>(qual_type)
	->getElementType()
	.getAsOpaquePtr());
	if (size)
	*size = 0;
	if (is_incomplete)
	*is_incomplete = false;
	return true;
	}
	if (element_type_ptr)
	element_type_ptr->Clear();
	if (size)
	*size = 0;
	if (is_incomplete)
	*is_incomplete = false;
	return false;
	}

	bool TypeSystemClang::IsVectorType(lldb::opaque_compiler_type_t type,
	CompilerType element_type, uint64_t size) {
	clang::QualType qual_type(GetCanonicalQualType(type));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Vector: {
	const clang::VectorType *vector_type =
	qual_type->getAs<clang::VectorType>();
	if (vector_type) {
	if (size)
	*size = vector_type->getNumElements();
	if (element_type)
	*element_type = GetType(vector_type->getElementType());
	}
	return true;
	} break;
	case clang::Type::ExtVector: {
	const clang::ExtVectorType *ext_vector_type =
	qual_type->getAs<clang::ExtVectorType>();
	if (ext_vector_type) {
	if (size)
	*size = ext_vector_type->getNumElements();
	if (element_type)
	*element_type =
	CompilerType(this, ext_vector_type->getElementType().getAsOpaquePtr());
	}
	return true;
	}
	default:
	break;
	}
	return false;
	}

	bool TypeSystemClang::IsRuntimeGeneratedType(
	lldb::opaque_compiler_type_t type) {
	clang::DeclContext *decl_ctx = GetDeclContextForType(GetQualType(type));
	if (!decl_ctx)
	return false;

	if (!llvm::isa<clang::ObjCInterfaceDecl>(decl_ctx))
	return false;

	clang::ObjCInterfaceDecl *result_iface_decl =
	llvm::dyn_cast<clang::ObjCInterfaceDecl>(decl_ctx);

	ClangASTMetadata *ast_metadata = GetMetadata(result_iface_decl);
	if (!ast_metadata)
	return false;
	return (ast_metadata->GetISAPtr() != 0);
	}

	bool TypeSystemClang::IsCharType(lldb::opaque_compiler_type_t type) {
	return GetQualType(type).getUnqualifiedType()->isCharType();
	}

	bool TypeSystemClang::IsCompleteType(lldb::opaque_compiler_type_t type) {
	// If the type hasn't been lazily completed yet, complete it now so that we
	// can give the caller an accurate answer whether the type actually has a
	// definition. Without completing the type now we would just tell the user
	// the current (internal) completeness state of the type and most users don't
	// care (or even know) about this behavior.
	const bool allow_completion = true;
	return GetCompleteQualType(&getASTContext(), GetQualType(type),
	allow_completion);
	}

	bool TypeSystemClang::IsConst(lldb::opaque_compiler_type_t type) {
	return GetQualType(type).isConstQualified();
	}

	bool TypeSystemClang::IsCStringType(lldb::opaque_compiler_type_t type,
	uint32_t &length) {
	CompilerType pointee_or_element_clang_type;
	length = 0;
	Flags type_flags(GetTypeInfo(type, &pointee_or_element_clang_type));

	if (!pointee_or_element_clang_type.IsValid())
	return false;

	if (type_flags.AnySet(eTypeIsArray \| eTypeIsPointer)) {
	if (pointee_or_element_clang_type.IsCharType()) {
	if (type_flags.Test(eTypeIsArray)) {
	// We know the size of the array and it could be a C string since it is
	// an array of characters
	length = llvm::cast<clang::ConstantArrayType>(
	GetCanonicalQualType(type).getTypePtr())
	->getSize()
	.getLimitedValue();
	}
	return true;
	}
	}
	return false;
	}

	bool TypeSystemClang::IsFunctionType(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	if (qual_type->isFunctionType()) {
	return true;
	}

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	default:
	break;
	case clang::Type::LValueReference:
	case clang::Type::RValueReference: {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
	if (reference_type)
	return IsFunctionType(
	reference_type->getPointeeType().getAsOpaquePtr());
	} break;
	}
	}
	return false;
	}

	// Used to detect "Homogeneous Floating-point Aggregates"
	uint32_t
	TypeSystemClang::IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
	CompilerType *base_type_ptr) {
	if (!type)
	return 0;

	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	if (cxx_record_decl->getNumBases() \|\| cxx_record_decl->isDynamicClass())
	return 0;
	}
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	if (record_type) {
	const clang::RecordDecl *record_decl = record_type->getDecl();
	if (record_decl) {
	// We are looking for a structure that contains only floating point
	// types
	clang::RecordDecl::field_iterator field_pos,
	field_end = record_decl->field_end();
	uint32_t num_fields = 0;
	bool is_hva = false;
	bool is_hfa = false;
	clang::QualType base_qual_type;
	uint64_t base_bitwidth = 0;
	for (field_pos = record_decl->field_begin(); field_pos != field_end;
	++field_pos) {
	clang::QualType field_qual_type = field_pos->getType();
	uint64_t field_bitwidth = getASTContext().getTypeSize(qual_type);
	if (field_qual_type->isFloatingType()) {
	if (field_qual_type->isComplexType())
	return 0;
	else {
	if (num_fields == 0)
	base_qual_type = field_qual_type;
	else {
	if (is_hva)
	return 0;
	is_hfa = true;
	if (field_qual_type.getTypePtr() !=
	base_qual_type.getTypePtr())
	return 0;
	}
	}
	} else if (field_qual_type->isVectorType() \|\|
	field_qual_type->isExtVectorType()) {
	if (num_fields == 0) {
	base_qual_type = field_qual_type;
	base_bitwidth = field_bitwidth;
	} else {
	if (is_hfa)
	return 0;
	is_hva = true;
	if (base_bitwidth != field_bitwidth)
	return 0;
	if (field_qual_type.getTypePtr() != base_qual_type.getTypePtr())
	return 0;
	}
	} else
	return 0;
	++num_fields;
	}
	if (base_type_ptr)
	*base_type_ptr = CompilerType(this, base_qual_type.getAsOpaquePtr());
	return num_fields;
	}
	}
	}
	break;

	default:
	break;
	}
	return 0;
	}

	size_t TypeSystemClang::GetNumberOfFunctionArguments(
	lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::FunctionProtoType *func =
	llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
	if (func)
	return func->getNumParams();
	}
	return 0;
	}

	CompilerType
	TypeSystemClang::GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
	const size_t index) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));
	const clang::FunctionProtoType *func =
	llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
	if (func) {
	if (index < func->getNumParams())
	return CompilerType(this, func->getParamType(index).getAsOpaquePtr());
	}
	}
	return CompilerType();
	}

	bool TypeSystemClang::IsFunctionPointerType(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	if (qual_type->isFunctionPointerType())
	return true;

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	default:
	break;

	case clang::Type::LValueReference:
	case clang::Type::RValueReference: {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
	if (reference_type)
	return IsFunctionPointerType(
	reference_type->getPointeeType().getAsOpaquePtr());
	} break;
	}
	}
	return false;
	}

	bool TypeSystemClang::IsBlockPointerType(
	lldb::opaque_compiler_type_t type,
	CompilerType *function_pointer_type_ptr) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	if (qual_type->isBlockPointerType()) {
	if (function_pointer_type_ptr) {
	const clang::BlockPointerType *block_pointer_type =
	qual_type->castAs<clang::BlockPointerType>();
	QualType pointee_type = block_pointer_type->getPointeeType();
	QualType function_pointer_type = m_ast_up->getPointerType(pointee_type);
	*function_pointer_type_ptr =
	CompilerType(this, function_pointer_type.getAsOpaquePtr());
	}
	return true;
	}

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	default:
	break;

	case clang::Type::LValueReference:
	case clang::Type::RValueReference: {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
	if (reference_type)
	return IsBlockPointerType(
	reference_type->getPointeeType().getAsOpaquePtr(),
	function_pointer_type_ptr);
	} break;
	}
	}
	return false;
	}

	bool TypeSystemClang::IsIntegerType(lldb::opaque_compiler_type_t type,
	bool &is_signed) {
	if (!type)
	return false;

	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::BuiltinType *builtin_type =
	llvm::dyn_cast<clang::BuiltinType>(qual_type->getCanonicalTypeInternal());

	if (builtin_type) {
	if (builtin_type->isInteger()) {
	is_signed = builtin_type->isSignedInteger();
	return true;
	}
	}

	return false;
	}

	bool TypeSystemClang::IsEnumerationType(lldb::opaque_compiler_type_t type,
	bool &is_signed) {
	if (type) {
	const clang::EnumType *enum_type = llvm::dyn_cast<clang::EnumType>(
	GetCanonicalQualType(type)->getCanonicalTypeInternal());

	if (enum_type) {
	IsIntegerType(enum_type->getDecl()->getIntegerType().getAsOpaquePtr(),
	is_signed);
	return true;
	}
	}

	return false;
	}

	bool TypeSystemClang::IsScopedEnumerationType(
	lldb::opaque_compiler_type_t type) {
	if (type) {
	const clang::EnumType *enum_type = llvm::dyn_cast<clang::EnumType>(
	GetCanonicalQualType(type)->getCanonicalTypeInternal());

	if (enum_type) {
	return enum_type->isScopedEnumeralType();
	}
	}

	return false;
	}

	bool TypeSystemClang::IsPointerType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	default:
	break;
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	return true;
	}
	return false;
	case clang::Type::ObjCObjectPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::BlockPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::BlockPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::Pointer:
	if (pointee_type)
	pointee_type->SetCompilerType(this,
	llvm::cast<clang::PointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::MemberPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::MemberPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	default:
	break;
	}
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool TypeSystemClang::IsPointerOrReferenceType(
	lldb::opaque_compiler_type_t type, CompilerType *pointee_type) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	default:
	break;
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	return true;
	}
	return false;
	case clang::Type::ObjCObjectPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
	->getPointeeType().getAsOpaquePtr());
	return true;
	case clang::Type::BlockPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::BlockPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::Pointer:
	if (pointee_type)
	pointee_type->SetCompilerType(this,
	llvm::cast<clang::PointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::MemberPointer:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::MemberPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	case clang::Type::LValueReference:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::LValueReferenceType>(qual_type)
	->desugar()
	.getAsOpaquePtr());
	return true;
	case clang::Type::RValueReference:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::RValueReferenceType>(qual_type)
	->desugar()
	.getAsOpaquePtr());
	return true;
	default:
	break;
	}
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool TypeSystemClang::IsReferenceType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type,
	bool *is_rvalue) {
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();

	switch (type_class) {
	case clang::Type::LValueReference:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::LValueReferenceType>(qual_type)
	->desugar()
	.getAsOpaquePtr());
	if (is_rvalue)
	*is_rvalue = false;
	return true;
	case clang::Type::RValueReference:
	if (pointee_type)
	pointee_type->SetCompilerType(
	this, llvm::cast<clang::RValueReferenceType>(qual_type)
	->desugar()
	.getAsOpaquePtr());
	if (is_rvalue)
	*is_rvalue = true;
	return true;

	default:
	break;
	}
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool TypeSystemClang::IsFloatingPointType(lldb::opaque_compiler_type_t type,
	uint32_t &count, bool &is_complex) {
	if (type) {
	clang::QualType qual_type(GetCanonicalQualType(type));

	if (const clang::BuiltinType *BT = llvm::dyn_cast<clang::BuiltinType>(
	qual_type->getCanonicalTypeInternal())) {
	clang::BuiltinType::Kind kind = BT->getKind();
	if (kind >= clang::BuiltinType::Float &&
	kind <= clang::BuiltinType::LongDouble) {
	count = 1;
	is_complex = false;
	return true;
	}
	} else if (const clang::ComplexType *CT =
	llvm::dyn_cast<clang::ComplexType>(
	qual_type->getCanonicalTypeInternal())) {
	if (IsFloatingPointType(CT->getElementType().getAsOpaquePtr(), count,
	is_complex)) {
	count = 2;
	is_complex = true;
	return true;
	}
	} else if (const clang::VectorType *VT = llvm::dyn_cast<clang::VectorType>(
	qual_type->getCanonicalTypeInternal())) {
	if (IsFloatingPointType(VT->getElementType().getAsOpaquePtr(), count,
	is_complex)) {
	count = VT->getNumElements();
	is_complex = false;
	return true;
	}
	}
	}
	count = 0;
	is_complex = false;
	return false;
	}

	bool TypeSystemClang::IsDefined(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;

	clang::QualType qual_type(GetQualType(type));
	const clang::TagType *tag_type =
	llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
	if (tag_type) {
	clang::TagDecl *tag_decl = tag_type->getDecl();
	if (tag_decl)
	return tag_decl->isCompleteDefinition();
	return false;
	} else {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	if (class_interface_decl)
	return class_interface_decl->getDefinition() != nullptr;
	return false;
	}
	}
	return true;
	}

	bool TypeSystemClang::IsObjCClassType(const CompilerType &type) {
	if (ClangUtil::IsClangType(type)) {
	clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));

	const clang::ObjCObjectPointerType *obj_pointer_type =
	llvm::dyn_cast<clang::ObjCObjectPointerType>(qual_type);

	if (obj_pointer_type)
	return obj_pointer_type->isObjCClassType();
	}
	return false;
	}

	bool TypeSystemClang::IsObjCObjectOrInterfaceType(const CompilerType &type) {
	if (ClangUtil::IsClangType(type))
	return ClangUtil::GetCanonicalQualType(type)->isObjCObjectOrInterfaceType();
	return false;
	}

	bool TypeSystemClang::IsClassType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	return (type_class == clang::Type::Record);
	}

	bool TypeSystemClang::IsEnumType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	return (type_class == clang::Type::Enum);
	}

	bool TypeSystemClang::IsPolymorphicClass(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	if (record_decl) {
	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl)
	return cxx_record_decl->isPolymorphic();
	}
	}
	break;

	default:
	break;
	}
	}
	return false;
	}

	bool TypeSystemClang::IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
	CompilerType *dynamic_pointee_type,
	bool check_cplusplus,
	bool check_objc) {
	clang::QualType pointee_qual_type;
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	bool success = false;
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Builtin:
	if (check_objc &&
	llvm::cast<clang::BuiltinType>(qual_type)->getKind() ==
	clang::BuiltinType::ObjCId) {
	if (dynamic_pointee_type)
	dynamic_pointee_type->SetCompilerType(this, type);
	return true;
	}
	break;

	case clang::Type::ObjCObjectPointer:
	if (check_objc) {
	if (const auto *objc_pointee_type =
	qual_type->getPointeeType().getTypePtrOrNull()) {
	if (const auto *objc_object_type =
	llvm::dyn_cast_or_null<clang::ObjCObjectType>(
	objc_pointee_type)) {
	if (objc_object_type->isObjCClass())
	return false;
	}
	}
	if (dynamic_pointee_type)
	dynamic_pointee_type->SetCompilerType(
	this, llvm::cast<clang::ObjCObjectPointerType>(qual_type)
	->getPointeeType()
	.getAsOpaquePtr());
	return true;
	}
	break;

	case clang::Type::Pointer:
	pointee_qual_type =
	llvm::cast<clang::PointerType>(qual_type)->getPointeeType();
	success = true;
	break;

	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	pointee_qual_type =
	llvm::cast<clang::ReferenceType>(qual_type)->getPointeeType();
	success = true;
	break;

	default:
	break;
	}

	if (success) {
	// Check to make sure what we are pointing too is a possible dynamic C++
	// type We currently accept any "void *" (in case we have a class that
	// has been watered down to an opaque pointer) and virtual C++ classes.
	const clang::Type::TypeClass pointee_type_class =
	pointee_qual_type.getCanonicalType()->getTypeClass();
	switch (pointee_type_class) {
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(pointee_qual_type)->getKind()) {
	case clang::BuiltinType::UnknownAny:
	case clang::BuiltinType::Void:
	if (dynamic_pointee_type)
	dynamic_pointee_type->SetCompilerType(
	this, pointee_qual_type.getAsOpaquePtr());
	return true;
	default:
	break;
	}
	break;

	case clang::Type::Record:
	if (check_cplusplus) {
	clang::CXXRecordDecl *cxx_record_decl =
	pointee_qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	bool is_complete = cxx_record_decl->isCompleteDefinition();

	if (is_complete)
	success = cxx_record_decl->isDynamicClass();
	else {
	ClangASTMetadata *metadata = GetMetadata(cxx_record_decl);
	if (metadata)
	success = metadata->GetIsDynamicCXXType();
	else {
	is_complete = GetType(pointee_qual_type).GetCompleteType();
	if (is_complete)
	success = cxx_record_decl->isDynamicClass();
	else
	success = false;
	}
	}

	if (success) {
	if (dynamic_pointee_type)
	dynamic_pointee_type->SetCompilerType(
	this, pointee_qual_type.getAsOpaquePtr());
	return true;
	}
	}
	}
	break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (check_objc) {
	if (dynamic_pointee_type)
	dynamic_pointee_type->SetCompilerType(
	this, pointee_qual_type.getAsOpaquePtr());
	return true;
	}
	break;

	default:
	break;
	}
	}
	}
	if (dynamic_pointee_type)
	dynamic_pointee_type->Clear();
	return false;
	}

	bool TypeSystemClang::IsScalarType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;

	return (GetTypeInfo(type, nullptr) & eTypeIsScalar) != 0;
	}

	bool TypeSystemClang::IsTypedefType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	return RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef})
	->getTypeClass() == clang::Type::Typedef;
	}

	bool TypeSystemClang::IsVoidType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	return GetCanonicalQualType(type)->isVoidType();
	}

	bool TypeSystemClang::CanPassInRegisters(const CompilerType &type) {
	if (auto *record_decl =
	TypeSystemClang::GetAsRecordDecl(type)) {
	return record_decl->canPassInRegisters();
	}
	return false;
	}

	bool TypeSystemClang::SupportsLanguage(lldb::LanguageType language) {
	return TypeSystemClangSupportsLanguage(language);
	}

	Optional<std::string>
	TypeSystemClang::GetCXXClassName(const CompilerType &type) {
	if (!type)
	return llvm::None;

	clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
	if (qual_type.isNull())
	return llvm::None;

	clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
	if (!cxx_record_decl)
	return llvm::None;

	return std::string(cxx_record_decl->getIdentifier()->getNameStart());
	}

	bool TypeSystemClang::IsCXXClassType(const CompilerType &type) {
	if (!type)
	return false;

	clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));
	return !qual_type.isNull() && qual_type->getAsCXXRecordDecl() != nullptr;
	}

	bool TypeSystemClang::IsBeingDefined(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::TagType *tag_type = llvm::dyn_cast<clang::TagType>(qual_type);
	if (tag_type)
	return tag_type->isBeingDefined();
	return false;
	}

	bool TypeSystemClang::IsObjCObjectPointerType(const CompilerType &type,
	CompilerType *class_type_ptr) {
	if (!ClangUtil::IsClangType(type))
	return false;

	clang::QualType qual_type(ClangUtil::GetCanonicalQualType(type));

	if (!qual_type.isNull() && qual_type->isObjCObjectPointerType()) {
	if (class_type_ptr) {
	if (!qual_type->isObjCClassType() && !qual_type->isObjCIdType()) {
	const clang::ObjCObjectPointerType *obj_pointer_type =
	llvm::dyn_cast<clang::ObjCObjectPointerType>(qual_type);
	if (obj_pointer_type == nullptr)
	class_type_ptr->Clear();
	else
	class_type_ptr->SetCompilerType(
	type.GetTypeSystem(),
	clang::QualType(obj_pointer_type->getInterfaceType(), 0)
	.getAsOpaquePtr());
	}
	}
	return true;
	}
	if (class_type_ptr)
	class_type_ptr->Clear();
	return false;
	}

	// Type Completion

	bool TypeSystemClang::GetCompleteType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return false;
	const bool allow_completion = true;
	return GetCompleteQualType(&getASTContext(), GetQualType(type),
	allow_completion);
	}

	ConstString TypeSystemClang::GetTypeName(lldb::opaque_compiler_type_t type) {
	if (!type)
	return ConstString();

	clang::QualType qual_type(GetQualType(type));

	// Remove certain type sugar from the name. Sugar such as elaborated types
	// or template types which only serve to improve diagnostics shouldn't
	// act as their own types from the user's perspective (e.g., formatter
	// shouldn't format a variable differently depending on how the ser has
	// specified the type. '::Type' and 'Type' should behave the same).
	// Typedefs and atomic derived types are not removed as they are actually
	// useful for identifiying specific types.
	qual_type = RemoveWrappingTypes(qual_type,
	{clang::Type::Typedef, clang::Type::Atomic});

	// For a typedef just return the qualified name.
	if (const auto *typedef_type = qual_type->getAs<clang::TypedefType>()) {
	const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
	return ConstString(GetTypeNameForDecl(typedef_decl));
	}

	return ConstString(qual_type.getAsString(GetTypePrintingPolicy()));
	}

	ConstString
	TypeSystemClang::GetDisplayTypeName(lldb::opaque_compiler_type_t type) {
	if (!type)
	return ConstString();

	clang::QualType qual_type(GetQualType(type));
	clang::PrintingPolicy printing_policy(getASTContext().getPrintingPolicy());
	printing_policy.SuppressTagKeyword = true;
	printing_policy.SuppressScope = false;
	printing_policy.SuppressUnwrittenScope = true;
	printing_policy.SuppressInlineNamespace = true;
	return ConstString(qual_type.getAsString(printing_policy));
	}

	uint32_t
	TypeSystemClang::GetTypeInfo(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_or_element_clang_type) {
	if (!type)
	return 0;

	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->Clear();

	clang::QualType qual_type =
	RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Attributed:
	return GetTypeInfo(qual_type->castAs<clang::AttributedType>()
	->getModifiedType()
	.getAsOpaquePtr(),
	pointee_or_element_clang_type);
	case clang::Type::Builtin: {
	const clang::BuiltinType *builtin_type =
	llvm::cast<clang::BuiltinType>(qual_type->getCanonicalTypeInternal());

	uint32_t builtin_type_flags = eTypeIsBuiltIn \| eTypeHasValue;
	switch (builtin_type->getKind()) {
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, getASTContext().ObjCBuiltinClassTy.getAsOpaquePtr());
	builtin_type_flags \|= eTypeIsPointer \| eTypeIsObjC;
	break;

	case clang::BuiltinType::ObjCSel:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, getASTContext().CharTy.getAsOpaquePtr());
	builtin_type_flags \|= eTypeIsPointer \| eTypeIsObjC;
	break;

	case clang::BuiltinType::Bool:
	case clang::BuiltinType::Char_U:
	case clang::BuiltinType::UChar:
	case clang::BuiltinType::WChar_U:
	case clang::BuiltinType::Char16:
	case clang::BuiltinType::Char32:
	case clang::BuiltinType::UShort:
	case clang::BuiltinType::UInt:
	case clang::BuiltinType::ULong:
	case clang::BuiltinType::ULongLong:
	case clang::BuiltinType::UInt128:
	case clang::BuiltinType::Char_S:
	case clang::BuiltinType::SChar:
	case clang::BuiltinType::WChar_S:
	case clang::BuiltinType::Short:
	case clang::BuiltinType::Int:
	case clang::BuiltinType::Long:
	case clang::BuiltinType::LongLong:
	case clang::BuiltinType::Int128:
	case clang::BuiltinType::Float:
	case clang::BuiltinType::Double:
	case clang::BuiltinType::LongDouble:
	builtin_type_flags \|= eTypeIsScalar;
	if (builtin_type->isInteger()) {
	builtin_type_flags \|= eTypeIsInteger;
	if (builtin_type->isSignedInteger())
	builtin_type_flags \|= eTypeIsSigned;
	} else if (builtin_type->isFloatingPoint())
	builtin_type_flags \|= eTypeIsFloat;
	break;
	default:
	break;
	}
	return builtin_type_flags;
	}

	case clang::Type::BlockPointer:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, qual_type->getPointeeType().getAsOpaquePtr());
	return eTypeIsPointer \| eTypeHasChildren \| eTypeIsBlock;

	case clang::Type::Complex: {
	uint32_t complex_type_flags =
	eTypeIsBuiltIn \| eTypeHasValue \| eTypeIsComplex;
	const clang::ComplexType *complex_type = llvm::dyn_cast<clang::ComplexType>(
	qual_type->getCanonicalTypeInternal());
	if (complex_type) {
	clang::QualType complex_element_type(complex_type->getElementType());
	if (complex_element_type->isIntegerType())
	complex_type_flags \|= eTypeIsFloat;
	else if (complex_element_type->isFloatingType())
	complex_type_flags \|= eTypeIsInteger;
	}
	return complex_type_flags;
	} break;

	case clang::Type::ConstantArray:
	case clang::Type::DependentSizedArray:
	case clang::Type::IncompleteArray:
	case clang::Type::VariableArray:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, llvm::cast<clang::ArrayType>(qual_type.getTypePtr())
	->getElementType()
	.getAsOpaquePtr());
	return eTypeHasChildren \| eTypeIsArray;

	case clang::Type::DependentName:
	return 0;
	case clang::Type::DependentSizedExtVector:
	return eTypeHasChildren \| eTypeIsVector;
	case clang::Type::DependentTemplateSpecialization:
	return eTypeIsTemplate;

	case clang::Type::Enum:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, llvm::cast<clang::EnumType>(qual_type)
	->getDecl()
	->getIntegerType()
	.getAsOpaquePtr());
	return eTypeIsEnumeration \| eTypeHasValue;

	case clang::Type::FunctionProto:
	return eTypeIsFuncPrototype \| eTypeHasValue;
	case clang::Type::FunctionNoProto:
	return eTypeIsFuncPrototype \| eTypeHasValue;
	case clang::Type::InjectedClassName:
	return 0;

	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, llvm::cast<clang::ReferenceType>(qual_type.getTypePtr())
	->getPointeeType()
	.getAsOpaquePtr());
	return eTypeHasChildren \| eTypeIsReference \| eTypeHasValue;

	case clang::Type::MemberPointer:
	return eTypeIsPointer \| eTypeIsMember \| eTypeHasValue;

	case clang::Type::ObjCObjectPointer:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, qual_type->getPointeeType().getAsOpaquePtr());
	return eTypeHasChildren \| eTypeIsObjC \| eTypeIsClass \| eTypeIsPointer \|
	eTypeHasValue;

	case clang::Type::ObjCObject:
	return eTypeHasChildren \| eTypeIsObjC \| eTypeIsClass;
	case clang::Type::ObjCInterface:
	return eTypeHasChildren \| eTypeIsObjC \| eTypeIsClass;

	case clang::Type::Pointer:
	if (pointee_or_element_clang_type)
	pointee_or_element_clang_type->SetCompilerType(
	this, qual_type->getPointeeType().getAsOpaquePtr());
	return eTypeHasChildren \| eTypeIsPointer \| eTypeHasValue;

	case clang::Type::Record:
	if (qual_type->getAsCXXRecordDecl())
	return eTypeHasChildren \| eTypeIsClass \| eTypeIsCPlusPlus;
	else
	return eTypeHasChildren \| eTypeIsStructUnion;
	break;
	case clang::Type::SubstTemplateTypeParm:
	return eTypeIsTemplate;
	case clang::Type::TemplateTypeParm:
	return eTypeIsTemplate;
	case clang::Type::TemplateSpecialization:
	return eTypeIsTemplate;

	case clang::Type::Typedef:
	return eTypeIsTypedef \| GetType(llvm::cast<clang::TypedefType>(qual_type)
	->getDecl()
	->getUnderlyingType())
	.GetTypeInfo(pointee_or_element_clang_type);
	case clang::Type::UnresolvedUsing:
	return 0;

	case clang::Type::ExtVector:
	case clang::Type::Vector: {
	uint32_t vector_type_flags = eTypeHasChildren \| eTypeIsVector;
	const clang::VectorType *vector_type = llvm::dyn_cast<clang::VectorType>(
	qual_type->getCanonicalTypeInternal());
	if (vector_type) {
	if (vector_type->isIntegerType())
	vector_type_flags \|= eTypeIsFloat;
	else if (vector_type->isFloatingType())
	vector_type_flags \|= eTypeIsInteger;
	}
	return vector_type_flags;
	}
	default:
	return 0;
	}
	return 0;
	}

	lldb::LanguageType
	TypeSystemClang::GetMinimumLanguage(lldb::opaque_compiler_type_t type) {
	if (!type)
	return lldb::eLanguageTypeC;

	// If the type is a reference, then resolve it to what it refers to first:
	clang::QualType qual_type(GetCanonicalQualType(type).getNonReferenceType());
	if (qual_type->isAnyPointerType()) {
	if (qual_type->isObjCObjectPointerType())
	return lldb::eLanguageTypeObjC;
	if (qual_type->getPointeeCXXRecordDecl())
	return lldb::eLanguageTypeC_plus_plus;

	clang::QualType pointee_type(qual_type->getPointeeType());
	if (pointee_type->getPointeeCXXRecordDecl())
	return lldb::eLanguageTypeC_plus_plus;
	if (pointee_type->isObjCObjectOrInterfaceType())
	return lldb::eLanguageTypeObjC;
	if (pointee_type->isObjCClassType())
	return lldb::eLanguageTypeObjC;
	if (pointee_type.getTypePtr() ==
	getASTContext().ObjCBuiltinIdTy.getTypePtr())
	return lldb::eLanguageTypeObjC;
	} else {
	if (qual_type->isObjCObjectOrInterfaceType())
	return lldb::eLanguageTypeObjC;
	if (qual_type->getAsCXXRecordDecl())
	return lldb::eLanguageTypeC_plus_plus;
	switch (qual_type->getTypeClass()) {
	default:
	break;
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	default:
	case clang::BuiltinType::Void:
	case clang::BuiltinType::Bool:
	case clang::BuiltinType::Char_U:
	case clang::BuiltinType::UChar:
	case clang::BuiltinType::WChar_U:
	case clang::BuiltinType::Char16:
	case clang::BuiltinType::Char32:
	case clang::BuiltinType::UShort:
	case clang::BuiltinType::UInt:
	case clang::BuiltinType::ULong:
	case clang::BuiltinType::ULongLong:
	case clang::BuiltinType::UInt128:
	case clang::BuiltinType::Char_S:
	case clang::BuiltinType::SChar:
	case clang::BuiltinType::WChar_S:
	case clang::BuiltinType::Short:
	case clang::BuiltinType::Int:
	case clang::BuiltinType::Long:
	case clang::BuiltinType::LongLong:
	case clang::BuiltinType::Int128:
	case clang::BuiltinType::Float:
	case clang::BuiltinType::Double:
	case clang::BuiltinType::LongDouble:
	break;

	case clang::BuiltinType::NullPtr:
	return eLanguageTypeC_plus_plus;

	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	case clang::BuiltinType::ObjCSel:
	return eLanguageTypeObjC;

	case clang::BuiltinType::Dependent:
	case clang::BuiltinType::Overload:
	case clang::BuiltinType::BoundMember:
	case clang::BuiltinType::UnknownAny:
	break;
	}
	break;
	case clang::Type::Typedef:
	return GetType(llvm::cast<clang::TypedefType>(qual_type)
	->getDecl()
	->getUnderlyingType())
	.GetMinimumLanguage();
	}
	}
	return lldb::eLanguageTypeC;
	}

	lldb::TypeClass
	TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) {
	if (!type)
	return lldb::eTypeClassInvalid;

	clang::QualType qual_type =
	RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});

	switch (qual_type->getTypeClass()) {
	case clang::Type::Atomic:
	case clang::Type::Auto:
	case clang::Type::Decltype:
	case clang::Type::Elaborated:
	case clang::Type::Paren:
	case clang::Type::TypeOf:
	case clang::Type::TypeOfExpr:
	case clang::Type::Using:
	llvm_unreachable("Handled in RemoveWrappingTypes!");
	case clang::Type::UnaryTransform:
	break;
	case clang::Type::FunctionNoProto:
	return lldb::eTypeClassFunction;
	case clang::Type::FunctionProto:
	return lldb::eTypeClassFunction;
	case clang::Type::IncompleteArray:
	return lldb::eTypeClassArray;
	case clang::Type::VariableArray:
	return lldb::eTypeClassArray;
	case clang::Type::ConstantArray:
	return lldb::eTypeClassArray;
	case clang::Type::DependentSizedArray:
	return lldb::eTypeClassArray;
	case clang::Type::DependentSizedExtVector:
	return lldb::eTypeClassVector;
	case clang::Type::DependentVector:
	return lldb::eTypeClassVector;
	case clang::Type::ExtVector:
	return lldb::eTypeClassVector;
	case clang::Type::Vector:
	return lldb::eTypeClassVector;
	case clang::Type::Builtin:
	// Ext-Int is just an integer type.
	case clang::Type::BitInt:
	case clang::Type::DependentBitInt:
	return lldb::eTypeClassBuiltin;
	case clang::Type::ObjCObjectPointer:
	return lldb::eTypeClassObjCObjectPointer;
	case clang::Type::BlockPointer:
	return lldb::eTypeClassBlockPointer;
	case clang::Type::Pointer:
	return lldb::eTypeClassPointer;
	case clang::Type::LValueReference:
	return lldb::eTypeClassReference;
	case clang::Type::RValueReference:
	return lldb::eTypeClassReference;
	case clang::Type::MemberPointer:
	return lldb::eTypeClassMemberPointer;
	case clang::Type::Complex:
	if (qual_type->isComplexType())
	return lldb::eTypeClassComplexFloat;
	else
	return lldb::eTypeClassComplexInteger;
	case clang::Type::ObjCObject:
	return lldb::eTypeClassObjCObject;
	case clang::Type::ObjCInterface:
	return lldb::eTypeClassObjCInterface;
	case clang::Type::Record: {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	if (record_decl->isUnion())
	return lldb::eTypeClassUnion;
	else if (record_decl->isStruct())
	return lldb::eTypeClassStruct;
	else
	return lldb::eTypeClassClass;
	} break;
	case clang::Type::Enum:
	return lldb::eTypeClassEnumeration;
	case clang::Type::Typedef:
	return lldb::eTypeClassTypedef;
	case clang::Type::UnresolvedUsing:
	break;

	case clang::Type::Attributed:
	case clang::Type::BTFTagAttributed:
	break;
	case clang::Type::TemplateTypeParm:
	break;
	case clang::Type::SubstTemplateTypeParm:
	break;
	case clang::Type::SubstTemplateTypeParmPack:
	break;
	case clang::Type::InjectedClassName:
	break;
	case clang::Type::DependentName:
	break;
	case clang::Type::DependentTemplateSpecialization:
	break;
	case clang::Type::PackExpansion:
	break;

	case clang::Type::TemplateSpecialization:
	break;
	case clang::Type::DeducedTemplateSpecialization:
	break;
	case clang::Type::Pipe:
	break;

	// pointer type decayed from an array or function type.
	case clang::Type::Decayed:
	break;
	case clang::Type::Adjusted:
	break;
	case clang::Type::ObjCTypeParam:
	break;

	case clang::Type::DependentAddressSpace:
	break;
	case clang::Type::MacroQualified:
	break;

	// Matrix types that we're not sure how to display at the moment.
	case clang::Type::ConstantMatrix:
	case clang::Type::DependentSizedMatrix:
	break;
	}
	// We don't know hot to display this type...
	return lldb::eTypeClassOther;
	}

	unsigned TypeSystemClang::GetTypeQualifiers(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetQualType(type).getQualifiers().getCVRQualifiers();
	return 0;
	}

	// Creating related types

	CompilerType
	TypeSystemClang::GetArrayElementType(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));

	const clang::Type *array_eletype =
	qual_type.getTypePtr()->getArrayElementTypeNoTypeQual();

	if (!array_eletype)
	return CompilerType();

	return GetType(clang::QualType(array_eletype, 0));
	}
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetArrayType(lldb::opaque_compiler_type_t type,
	uint64_t size) {
	if (type) {
	clang::QualType qual_type(GetCanonicalQualType(type));
	clang::ASTContext &ast_ctx = getASTContext();
	if (size != 0)
	return GetType(ast_ctx.getConstantArrayType(
	qual_type, llvm::APInt(64, size), nullptr,
	clang::ArrayType::ArraySizeModifier::Normal, 0));
	else
	return GetType(ast_ctx.getIncompleteArrayType(
	qual_type, clang::ArrayType::ArraySizeModifier::Normal, 0));
	}

	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetCanonicalType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetType(GetCanonicalQualType(type));
	return CompilerType();
	}

	static clang::QualType GetFullyUnqualifiedType_Impl(clang::ASTContext *ast,
	clang::QualType qual_type) {
	if (qual_type->isPointerType())
	qual_type = ast->getPointerType(
	GetFullyUnqualifiedType_Impl(ast, qual_type->getPointeeType()));
	else if (const ConstantArrayType *arr =
	ast->getAsConstantArrayType(qual_type)) {
	qual_type = ast->getConstantArrayType(
	GetFullyUnqualifiedType_Impl(ast, arr->getElementType()),
	arr->getSize(), arr->getSizeExpr(), arr->getSizeModifier(),
	arr->getIndexTypeQualifiers().getAsOpaqueValue());
	} else
	qual_type = qual_type.getUnqualifiedType();
	qual_type.removeLocalConst();
	qual_type.removeLocalRestrict();
	qual_type.removeLocalVolatile();
	return qual_type;
	}

	CompilerType
	TypeSystemClang::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetType(
	GetFullyUnqualifiedType_Impl(&getASTContext(), GetQualType(type)));
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetEnumerationIntegerType(GetType(GetCanonicalQualType(type)));
	return CompilerType();
	}

	int TypeSystemClang::GetFunctionArgumentCount(
	lldb::opaque_compiler_type_t type) {
	if (type) {
	const clang::FunctionProtoType *func =
	llvm::dyn_cast<clang::FunctionProtoType>(GetCanonicalQualType(type));
	if (func)
	return func->getNumParams();
	}
	return -1;
	}

	CompilerType TypeSystemClang::GetFunctionArgumentTypeAtIndex(
	lldb::opaque_compiler_type_t type, size_t idx) {
	if (type) {
	const clang::FunctionProtoType *func =
	llvm::dyn_cast<clang::FunctionProtoType>(GetQualType(type));
	if (func) {
	const uint32_t num_args = func->getNumParams();
	if (idx < num_args)
	return GetType(func->getParamType(idx));
	}
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetFunctionReturnType(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));
	const clang::FunctionProtoType *func =
	llvm::dyn_cast<clang::FunctionProtoType>(qual_type.getTypePtr());
	if (func)
	return GetType(func->getReturnType());
	}
	return CompilerType();
	}

	size_t
	TypeSystemClang::GetNumMemberFunctions(lldb::opaque_compiler_type_t type) {
	size_t num_functions = 0;
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	switch (qual_type->getTypeClass()) {
	case clang::Type::Record:
	if (GetCompleteQualType(&getASTContext(), qual_type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	assert(record_decl);
	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl)
	num_functions = std::distance(cxx_record_decl->method_begin(),
	cxx_record_decl->method_end());
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	const clang::ObjCObjectPointerType *objc_class_type =
	qual_type->castAs<clang::ObjCObjectPointerType>();
	const clang::ObjCInterfaceType *objc_interface_type =
	objc_class_type->getInterfaceType();
	if (objc_interface_type &&
	GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
	const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getDecl();
	if (class_interface_decl) {
	num_functions = std::distance(class_interface_decl->meth_begin(),
	class_interface_decl->meth_end());
	}
	}
	break;
	}

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	if (class_interface_decl)
	num_functions = std::distance(class_interface_decl->meth_begin(),
	class_interface_decl->meth_end());
	}
	}
	break;

	default:
	break;
	}
	}
	return num_functions;
	}

	TypeMemberFunctionImpl
	TypeSystemClang::GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx) {
	std::string name;
	MemberFunctionKind kind(MemberFunctionKind::eMemberFunctionKindUnknown);
	CompilerType clang_type;
	CompilerDecl clang_decl;
	if (type) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	switch (qual_type->getTypeClass()) {
	case clang::Type::Record:
	if (GetCompleteQualType(&getASTContext(), qual_type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	assert(record_decl);
	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl) {
	auto method_iter = cxx_record_decl->method_begin();
	auto method_end = cxx_record_decl->method_end();
	if (idx <
	static_cast<size_t>(std::distance(method_iter, method_end))) {
	std::advance(method_iter, idx);
	clang::CXXMethodDecl *cxx_method_decl =
	method_iter->getCanonicalDecl();
	if (cxx_method_decl) {
	name = cxx_method_decl->getDeclName().getAsString();
	if (cxx_method_decl->isStatic())
	kind = lldb::eMemberFunctionKindStaticMethod;
	else if (llvm::isa<clang::CXXConstructorDecl>(cxx_method_decl))
	kind = lldb::eMemberFunctionKindConstructor;
	else if (llvm::isa<clang::CXXDestructorDecl>(cxx_method_decl))
	kind = lldb::eMemberFunctionKindDestructor;
	else
	kind = lldb::eMemberFunctionKindInstanceMethod;
	clang_type = GetType(cxx_method_decl->getType());
	clang_decl = GetCompilerDecl(cxx_method_decl);
	}
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	const clang::ObjCObjectPointerType *objc_class_type =
	qual_type->castAs<clang::ObjCObjectPointerType>();
	const clang::ObjCInterfaceType *objc_interface_type =
	objc_class_type->getInterfaceType();
	if (objc_interface_type &&
	GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
	const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getDecl();
	if (class_interface_decl) {
	auto method_iter = class_interface_decl->meth_begin();
	auto method_end = class_interface_decl->meth_end();
	if (idx <
	static_cast<size_t>(std::distance(method_iter, method_end))) {
	std::advance(method_iter, idx);
	clang::ObjCMethodDecl *objc_method_decl =
	method_iter->getCanonicalDecl();
	if (objc_method_decl) {
	clang_decl = GetCompilerDecl(objc_method_decl);
	name = objc_method_decl->getSelector().getAsString();
	if (objc_method_decl->isClassMethod())
	kind = lldb::eMemberFunctionKindStaticMethod;
	else
	kind = lldb::eMemberFunctionKindInstanceMethod;
	}
	}
	}
	}
	break;
	}

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	if (class_interface_decl) {
	auto method_iter = class_interface_decl->meth_begin();
	auto method_end = class_interface_decl->meth_end();
	if (idx <
	static_cast<size_t>(std::distance(method_iter, method_end))) {
	std::advance(method_iter, idx);
	clang::ObjCMethodDecl *objc_method_decl =
	method_iter->getCanonicalDecl();
	if (objc_method_decl) {
	clang_decl = GetCompilerDecl(objc_method_decl);
	name = objc_method_decl->getSelector().getAsString();
	if (objc_method_decl->isClassMethod())
	kind = lldb::eMemberFunctionKindStaticMethod;
	else
	kind = lldb::eMemberFunctionKindInstanceMethod;
	}
	}
	}
	}
	}
	break;

	default:
	break;
	}
	}

	if (kind == eMemberFunctionKindUnknown)
	return TypeMemberFunctionImpl();
	else
	return TypeMemberFunctionImpl(clang_type, clang_decl, name, kind);
	}

	CompilerType
	TypeSystemClang::GetNonReferenceType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetType(GetQualType(type).getNonReferenceType());
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetPointeeType(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));
	return GetType(qual_type.getTypePtr()->getPointeeType());
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetPointerType(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));

	switch (qual_type.getDesugaredType(getASTContext())->getTypeClass()) {
	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	return GetType(getASTContext().getObjCObjectPointerType(qual_type));

	default:
	return GetType(getASTContext().getPointerType(qual_type));
	}
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetLValueReferenceType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetType(getASTContext().getLValueReferenceType(GetQualType(type)));
	else
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetRValueReferenceType(lldb::opaque_compiler_type_t type) {
	if (type)
	return GetType(getASTContext().getRValueReferenceType(GetQualType(type)));
	else
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetAtomicType(lldb::opaque_compiler_type_t type) {
	if (!type)
	return CompilerType();
	return GetType(getASTContext().getAtomicType(GetQualType(type)));
	}

	CompilerType
	TypeSystemClang::AddConstModifier(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType result(GetQualType(type));
	result.addConst();
	return GetType(result);
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType result(GetQualType(type));
	result.addVolatile();
	return GetType(result);
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::AddRestrictModifier(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType result(GetQualType(type));
	result.addRestrict();
	return GetType(result);
	}
	return CompilerType();
	}

	CompilerType TypeSystemClang::CreateTypedef(
	lldb::opaque_compiler_type_t type, const char *typedef_name,
	const CompilerDeclContext &compiler_decl_ctx, uint32_t payload) {
	if (type && typedef_name && typedef_name[0]) {
	clang::ASTContext &clang_ast = getASTContext();
	clang::QualType qual_type(GetQualType(type));

	clang::DeclContext *decl_ctx =
	TypeSystemClang::DeclContextGetAsDeclContext(compiler_decl_ctx);
	if (!decl_ctx)
	decl_ctx = getASTContext().getTranslationUnitDecl();

	clang::TypedefDecl *decl =
	clang::TypedefDecl::CreateDeserialized(clang_ast, 0);
	decl->setDeclContext(decl_ctx);
	decl->setDeclName(&clang_ast.Idents.get(typedef_name));
	decl->setTypeSourceInfo(clang_ast.getTrivialTypeSourceInfo(qual_type));
	decl_ctx->addDecl(decl);
	SetOwningModule(decl, TypePayloadClang(payload).GetOwningModule());

	clang::TagDecl *tdecl = nullptr;
	if (!qual_type.isNull()) {
	if (const clang::RecordType *rt = qual_type->getAs<clang::RecordType>())
	tdecl = rt->getDecl();
	if (const clang::EnumType *et = qual_type->getAs<clang::EnumType>())
	tdecl = et->getDecl();
	}

	// Check whether this declaration is an anonymous struct, union, or enum,
	// hidden behind a typedef. If so, we try to check whether we have a
	// typedef tag to attach to the original record declaration
	if (tdecl && !tdecl->getIdentifier() && !tdecl->getTypedefNameForAnonDecl())
	tdecl->setTypedefNameForAnonDecl(decl);

	decl->setAccess(clang::AS_public); // TODO respect proper access specifier

	// Get a uniqued clang::QualType for the typedef decl type
	return GetType(clang_ast.getTypedefType(decl));
	}
	return CompilerType();
	}

	CompilerType
	TypeSystemClang::GetTypedefedType(lldb::opaque_compiler_type_t type) {
	if (type) {
	const clang::TypedefType *typedef_type = llvm::dyn_cast<clang::TypedefType>(
	RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef}));
	if (typedef_type)
	return GetType(typedef_type->getDecl()->getUnderlyingType());
	}
	return CompilerType();
	}

	// Create related types using the current type's AST

	CompilerType TypeSystemClang::GetBasicTypeFromAST(lldb::BasicType basic_type) {
	return TypeSystemClang::GetBasicType(basic_type);
	}
	// Exploring the type

	const llvm::fltSemantics &
	TypeSystemClang::GetFloatTypeSemantics(size_t byte_size) {
	clang::ASTContext &ast = getASTContext();
	const size_t bit_size = byte_size * 8;
	if (bit_size == ast.getTypeSize(ast.FloatTy))
	return ast.getFloatTypeSemantics(ast.FloatTy);
	else if (bit_size == ast.getTypeSize(ast.DoubleTy))
	return ast.getFloatTypeSemantics(ast.DoubleTy);
	else if (bit_size == ast.getTypeSize(ast.LongDoubleTy))
	return ast.getFloatTypeSemantics(ast.LongDoubleTy);
	else if (bit_size == ast.getTypeSize(ast.HalfTy))
	return ast.getFloatTypeSemantics(ast.HalfTy);
	return llvm::APFloatBase::Bogus();
	}

	Optional<uint64_t>
	TypeSystemClang::GetBitSize(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) {
	if (GetCompleteType(type)) {
	clang::QualType qual_type(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type))
	return getASTContext().getTypeSize(qual_type);
	else
	return None;
	break;

	case clang::Type::ObjCInterface:
	case clang::Type::ObjCObject: {
	ExecutionContext exe_ctx(exe_scope);
	Process *process = exe_ctx.GetProcessPtr();
	if (process) {
	ObjCLanguageRuntime objc_runtime = ObjCLanguageRuntime::Get(process);
	if (objc_runtime) {
	uint64_t bit_size = 0;
	if (objc_runtime->GetTypeBitSize(GetType(qual_type), bit_size))
	return bit_size;
	}
	} else {
	static bool g_printed = false;
	if (!g_printed) {
	StreamString s;
	DumpTypeDescription(type, &s);

	llvm::outs() << "warning: trying to determine the size of type ";
	llvm::outs() << s.GetString() << "\n";
	llvm::outs() << "without a valid ExecutionContext. this is not "
	"reliable. please file a bug against LLDB.\n";
	llvm::outs() << "backtrace:\n";
	llvm::sys::PrintStackTrace(llvm::outs());
	llvm::outs() << "\n";
	g_printed = true;
	}
	}
	}
	LLVM_FALLTHROUGH;
	default:
	const uint32_t bit_size = getASTContext().getTypeSize(qual_type);
	if (bit_size == 0) {
	if (qual_type->isIncompleteArrayType())
	return getASTContext().getTypeSize(
	qual_type->getArrayElementTypeNoTypeQual()
	->getCanonicalTypeUnqualified());
	}
	if (qual_type->isObjCObjectOrInterfaceType())
	return bit_size +
	getASTContext().getTypeSize(getASTContext().ObjCBuiltinClassTy);
	// Function types actually have a size of 0, that's not an error.
	if (qual_type->isFunctionProtoType())
	return bit_size;
	if (bit_size)
	return bit_size;
	}
	}
	return None;
	}

	llvm::Optional<size_t>
	TypeSystemClang::GetTypeBitAlign(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) {
	if (GetCompleteType(type))
	return getASTContext().getTypeAlign(GetQualType(type));
	return {};
	}

	lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
	uint64_t &count) {
	if (!type)
	return lldb::eEncodingInvalid;

	count = 1;
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	switch (qual_type->getTypeClass()) {
	case clang::Type::Atomic:
	case clang::Type::Auto:
	case clang::Type::Decltype:
	case clang::Type::Elaborated:
	case clang::Type::Paren:
	case clang::Type::Typedef:
	case clang::Type::TypeOf:
	case clang::Type::TypeOfExpr:
	case clang::Type::Using:
	llvm_unreachable("Handled in RemoveWrappingTypes!");

	case clang::Type::UnaryTransform:
	break;

	case clang::Type::FunctionNoProto:
	case clang::Type::FunctionProto:
	break;

	case clang::Type::IncompleteArray:
	case clang::Type::VariableArray:
	break;

	case clang::Type::ConstantArray:
	break;

	case clang::Type::DependentVector:
	case clang::Type::ExtVector:
	case clang::Type::Vector:
	// TODO: Set this to more than one???
	break;

	case clang::Type::BitInt:
	case clang::Type::DependentBitInt:
	return qual_type->isUnsignedIntegerType() ? lldb::eEncodingUint
	: lldb::eEncodingSint;

	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	case clang::BuiltinType::Void:
	break;

	case clang::BuiltinType::Char_S:
	case clang::BuiltinType::SChar:
	case clang::BuiltinType::WChar_S:
	case clang::BuiltinType::Short:
	case clang::BuiltinType::Int:
	case clang::BuiltinType::Long:
	case clang::BuiltinType::LongLong:
	case clang::BuiltinType::Int128:
	return lldb::eEncodingSint;

	case clang::BuiltinType::Bool:
	case clang::BuiltinType::Char_U:
	case clang::BuiltinType::UChar:
	case clang::BuiltinType::WChar_U:
	case clang::BuiltinType::Char8:
	case clang::BuiltinType::Char16:
	case clang::BuiltinType::Char32:
	case clang::BuiltinType::UShort:
	case clang::BuiltinType::UInt:
	case clang::BuiltinType::ULong:
	case clang::BuiltinType::ULongLong:
	case clang::BuiltinType::UInt128:
	return lldb::eEncodingUint;

	// Fixed point types. Note that they are currently ignored.
	case clang::BuiltinType::ShortAccum:
	case clang::BuiltinType::Accum:
	case clang::BuiltinType::LongAccum:
	case clang::BuiltinType::UShortAccum:
	case clang::BuiltinType::UAccum:
	case clang::BuiltinType::ULongAccum:
	case clang::BuiltinType::ShortFract:
	case clang::BuiltinType::Fract:
	case clang::BuiltinType::LongFract:
	case clang::BuiltinType::UShortFract:
	case clang::BuiltinType::UFract:
	case clang::BuiltinType::ULongFract:
	case clang::BuiltinType::SatShortAccum:
	case clang::BuiltinType::SatAccum:
	case clang::BuiltinType::SatLongAccum:
	case clang::BuiltinType::SatUShortAccum:
	case clang::BuiltinType::SatUAccum:
	case clang::BuiltinType::SatULongAccum:
	case clang::BuiltinType::SatShortFract:
	case clang::BuiltinType::SatFract:
	case clang::BuiltinType::SatLongFract:
	case clang::BuiltinType::SatUShortFract:
	case clang::BuiltinType::SatUFract:
	case clang::BuiltinType::SatULongFract:
	break;

	case clang::BuiltinType::Half:
	case clang::BuiltinType::Float:
	case clang::BuiltinType::Float16:
	case clang::BuiltinType::Float128:
	case clang::BuiltinType::Double:
	case clang::BuiltinType::LongDouble:
	case clang::BuiltinType::BFloat16:
	case clang::BuiltinType::Ibm128:
	return lldb::eEncodingIEEE754;

	case clang::BuiltinType::ObjCClass:
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCSel:
	return lldb::eEncodingUint;

	case clang::BuiltinType::NullPtr:
	return lldb::eEncodingUint;

	case clang::BuiltinType::Kind::ARCUnbridgedCast:
	case clang::BuiltinType::Kind::BoundMember:
	case clang::BuiltinType::Kind::BuiltinFn:
	case clang::BuiltinType::Kind::Dependent:
	case clang::BuiltinType::Kind::OCLClkEvent:
	case clang::BuiltinType::Kind::OCLEvent:
	case clang::BuiltinType::Kind::OCLImage1dRO:
	case clang::BuiltinType::Kind::OCLImage1dWO:
	case clang::BuiltinType::Kind::OCLImage1dRW:
	case clang::BuiltinType::Kind::OCLImage1dArrayRO:
	case clang::BuiltinType::Kind::OCLImage1dArrayWO:
	case clang::BuiltinType::Kind::OCLImage1dArrayRW:
	case clang::BuiltinType::Kind::OCLImage1dBufferRO:
	case clang::BuiltinType::Kind::OCLImage1dBufferWO:
	case clang::BuiltinType::Kind::OCLImage1dBufferRW:
	case clang::BuiltinType::Kind::OCLImage2dRO:
	case clang::BuiltinType::Kind::OCLImage2dWO:
	case clang::BuiltinType::Kind::OCLImage2dRW:
	case clang::BuiltinType::Kind::OCLImage2dArrayRO:
	case clang::BuiltinType::Kind::OCLImage2dArrayWO:
	case clang::BuiltinType::Kind::OCLImage2dArrayRW:
	case clang::BuiltinType::Kind::OCLImage2dArrayDepthRO:
	case clang::BuiltinType::Kind::OCLImage2dArrayDepthWO:
	case clang::BuiltinType::Kind::OCLImage2dArrayDepthRW:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAARO:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAAWO:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAARW:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthRO:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthWO:
	case clang::BuiltinType::Kind::OCLImage2dArrayMSAADepthRW:
	case clang::BuiltinType::Kind::OCLImage2dDepthRO:
	case clang::BuiltinType::Kind::OCLImage2dDepthWO:
	case clang::BuiltinType::Kind::OCLImage2dDepthRW:
	case clang::BuiltinType::Kind::OCLImage2dMSAARO:
	case clang::BuiltinType::Kind::OCLImage2dMSAAWO:
	case clang::BuiltinType::Kind::OCLImage2dMSAARW:
	case clang::BuiltinType::Kind::OCLImage2dMSAADepthRO:
	case clang::BuiltinType::Kind::OCLImage2dMSAADepthWO:
	case clang::BuiltinType::Kind::OCLImage2dMSAADepthRW:
	case clang::BuiltinType::Kind::OCLImage3dRO:
	case clang::BuiltinType::Kind::OCLImage3dWO:
	case clang::BuiltinType::Kind::OCLImage3dRW:
	case clang::BuiltinType::Kind::OCLQueue:
	case clang::BuiltinType::Kind::OCLReserveID:
	case clang::BuiltinType::Kind::OCLSampler:
	case clang::BuiltinType::Kind::OMPArraySection:
	case clang::BuiltinType::Kind::OMPArrayShaping:
	case clang::BuiltinType::Kind::OMPIterator:
	case clang::BuiltinType::Kind::Overload:
	case clang::BuiltinType::Kind::PseudoObject:
	case clang::BuiltinType::Kind::UnknownAny:
	break;

	case clang::BuiltinType::OCLIntelSubgroupAVCMcePayload:
	case clang::BuiltinType::OCLIntelSubgroupAVCImePayload:
	case clang::BuiltinType::OCLIntelSubgroupAVCRefPayload:
	case clang::BuiltinType::OCLIntelSubgroupAVCSicPayload:
	case clang::BuiltinType::OCLIntelSubgroupAVCMceResult:
	case clang::BuiltinType::OCLIntelSubgroupAVCImeResult:
	case clang::BuiltinType::OCLIntelSubgroupAVCRefResult:
	case clang::BuiltinType::OCLIntelSubgroupAVCSicResult:
	case clang::BuiltinType::OCLIntelSubgroupAVCImeResultSingleRefStreamout:
	case clang::BuiltinType::OCLIntelSubgroupAVCImeResultDualRefStreamout:
	case clang::BuiltinType::OCLIntelSubgroupAVCImeSingleRefStreamin:
	case clang::BuiltinType::OCLIntelSubgroupAVCImeDualRefStreamin:
	break;

	// PowerPC -- Matrix Multiply Assist
	case clang::BuiltinType::VectorPair:
	case clang::BuiltinType::VectorQuad:
	break;

	// ARM -- Scalable Vector Extension
	case clang::BuiltinType::SveBool:
	case clang::BuiltinType::SveInt8:
	case clang::BuiltinType::SveInt8x2:
	case clang::BuiltinType::SveInt8x3:
	case clang::BuiltinType::SveInt8x4:
	case clang::BuiltinType::SveInt16:
	case clang::BuiltinType::SveInt16x2:
	case clang::BuiltinType::SveInt16x3:
	case clang::BuiltinType::SveInt16x4:
	case clang::BuiltinType::SveInt32:
	case clang::BuiltinType::SveInt32x2:
	case clang::BuiltinType::SveInt32x3:
	case clang::BuiltinType::SveInt32x4:
	case clang::BuiltinType::SveInt64:
	case clang::BuiltinType::SveInt64x2:
	case clang::BuiltinType::SveInt64x3:
	case clang::BuiltinType::SveInt64x4:
	case clang::BuiltinType::SveUint8:
	case clang::BuiltinType::SveUint8x2:
	case clang::BuiltinType::SveUint8x3:
	case clang::BuiltinType::SveUint8x4:
	case clang::BuiltinType::SveUint16:
	case clang::BuiltinType::SveUint16x2:
	case clang::BuiltinType::SveUint16x3:
	case clang::BuiltinType::SveUint16x4:
	case clang::BuiltinType::SveUint32:
	case clang::BuiltinType::SveUint32x2:
	case clang::BuiltinType::SveUint32x3:
	case clang::BuiltinType::SveUint32x4:
	case clang::BuiltinType::SveUint64:
	case clang::BuiltinType::SveUint64x2:
	case clang::BuiltinType::SveUint64x3:
	case clang::BuiltinType::SveUint64x4:
	case clang::BuiltinType::SveFloat16:
	case clang::BuiltinType::SveBFloat16:
	case clang::BuiltinType::SveBFloat16x2:
	case clang::BuiltinType::SveBFloat16x3:
	case clang::BuiltinType::SveBFloat16x4:
	case clang::BuiltinType::SveFloat16x2:
	case clang::BuiltinType::SveFloat16x3:
	case clang::BuiltinType::SveFloat16x4:
	case clang::BuiltinType::SveFloat32:
	case clang::BuiltinType::SveFloat32x2:
	case clang::BuiltinType::SveFloat32x3:
	case clang::BuiltinType::SveFloat32x4:
	case clang::BuiltinType::SveFloat64:
	case clang::BuiltinType::SveFloat64x2:
	case clang::BuiltinType::SveFloat64x3:
	case clang::BuiltinType::SveFloat64x4:
	break;

	// RISC-V V builtin types.
	case clang::BuiltinType::RvvInt8mf8:
	case clang::BuiltinType::RvvInt8mf4:
	case clang::BuiltinType::RvvInt8mf2:
	case clang::BuiltinType::RvvInt8m1:
	case clang::BuiltinType::RvvInt8m2:
	case clang::BuiltinType::RvvInt8m4:
	case clang::BuiltinType::RvvInt8m8:
	case clang::BuiltinType::RvvUint8mf8:
	case clang::BuiltinType::RvvUint8mf4:
	case clang::BuiltinType::RvvUint8mf2:
	case clang::BuiltinType::RvvUint8m1:
	case clang::BuiltinType::RvvUint8m2:
	case clang::BuiltinType::RvvUint8m4:
	case clang::BuiltinType::RvvUint8m8:
	case clang::BuiltinType::RvvInt16mf4:
	case clang::BuiltinType::RvvInt16mf2:
	case clang::BuiltinType::RvvInt16m1:
	case clang::BuiltinType::RvvInt16m2:
	case clang::BuiltinType::RvvInt16m4:
	case clang::BuiltinType::RvvInt16m8:
	case clang::BuiltinType::RvvUint16mf4:
	case clang::BuiltinType::RvvUint16mf2:
	case clang::BuiltinType::RvvUint16m1:
	case clang::BuiltinType::RvvUint16m2:
	case clang::BuiltinType::RvvUint16m4:
	case clang::BuiltinType::RvvUint16m8:
	case clang::BuiltinType::RvvInt32mf2:
	case clang::BuiltinType::RvvInt32m1:
	case clang::BuiltinType::RvvInt32m2:
	case clang::BuiltinType::RvvInt32m4:
	case clang::BuiltinType::RvvInt32m8:
	case clang::BuiltinType::RvvUint32mf2:
	case clang::BuiltinType::RvvUint32m1:
	case clang::BuiltinType::RvvUint32m2:
	case clang::BuiltinType::RvvUint32m4:
	case clang::BuiltinType::RvvUint32m8:
	case clang::BuiltinType::RvvInt64m1:
	case clang::BuiltinType::RvvInt64m2:
	case clang::BuiltinType::RvvInt64m4:
	case clang::BuiltinType::RvvInt64m8:
	case clang::BuiltinType::RvvUint64m1:
	case clang::BuiltinType::RvvUint64m2:
	case clang::BuiltinType::RvvUint64m4:
	case clang::BuiltinType::RvvUint64m8:
	case clang::BuiltinType::RvvFloat16mf4:
	case clang::BuiltinType::RvvFloat16mf2:
	case clang::BuiltinType::RvvFloat16m1:
	case clang::BuiltinType::RvvFloat16m2:
	case clang::BuiltinType::RvvFloat16m4:
	case clang::BuiltinType::RvvFloat16m8:
	case clang::BuiltinType::RvvFloat32mf2:
	case clang::BuiltinType::RvvFloat32m1:
	case clang::BuiltinType::RvvFloat32m2:
	case clang::BuiltinType::RvvFloat32m4:
	case clang::BuiltinType::RvvFloat32m8:
	case clang::BuiltinType::RvvFloat64m1:
	case clang::BuiltinType::RvvFloat64m2:
	case clang::BuiltinType::RvvFloat64m4:
	case clang::BuiltinType::RvvFloat64m8:
	case clang::BuiltinType::RvvBool1:
	case clang::BuiltinType::RvvBool2:
	case clang::BuiltinType::RvvBool4:
	case clang::BuiltinType::RvvBool8:
	case clang::BuiltinType::RvvBool16:
	case clang::BuiltinType::RvvBool32:
	case clang::BuiltinType::RvvBool64:
	break;

	case clang::BuiltinType::IncompleteMatrixIdx:
	break;
	}
	break;
	// All pointer types are represented as unsigned integer encodings. We may
	// nee to add a eEncodingPointer if we ever need to know the difference
	case clang::Type::ObjCObjectPointer:
	case clang::Type::BlockPointer:
	case clang::Type::Pointer:
	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	case clang::Type::MemberPointer:
	return lldb::eEncodingUint;
	case clang::Type::Complex: {
	lldb::Encoding encoding = lldb::eEncodingIEEE754;
	if (qual_type->isComplexType())
	encoding = lldb::eEncodingIEEE754;
	else {
	const clang::ComplexType *complex_type =
	qual_type->getAsComplexIntegerType();
	if (complex_type)
	encoding = GetType(complex_type->getElementType()).GetEncoding(count);
	else
	encoding = lldb::eEncodingSint;
	}
	count = 2;
	return encoding;
	}

	case clang::Type::ObjCInterface:
	break;
	case clang::Type::Record:
	break;
	case clang::Type::Enum:
	return lldb::eEncodingSint;
	case clang::Type::DependentSizedArray:
	case clang::Type::DependentSizedExtVector:
	case clang::Type::UnresolvedUsing:
	case clang::Type::Attributed:
	case clang::Type::BTFTagAttributed:
	case clang::Type::TemplateTypeParm:
	case clang::Type::SubstTemplateTypeParm:
	case clang::Type::SubstTemplateTypeParmPack:
	case clang::Type::InjectedClassName:
	case clang::Type::DependentName:
	case clang::Type::DependentTemplateSpecialization:
	case clang::Type::PackExpansion:
	case clang::Type::ObjCObject:

	case clang::Type::TemplateSpecialization:
	case clang::Type::DeducedTemplateSpecialization:
	case clang::Type::Adjusted:
	case clang::Type::Pipe:
	break;

	// pointer type decayed from an array or function type.
	case clang::Type::Decayed:
	break;
	case clang::Type::ObjCTypeParam:
	break;

	case clang::Type::DependentAddressSpace:
	break;
	case clang::Type::MacroQualified:
	break;

	case clang::Type::ConstantMatrix:
	case clang::Type::DependentSizedMatrix:
	break;
	}
	count = 0;
	return lldb::eEncodingInvalid;
	}

	lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) {
	if (!type)
	return lldb::eFormatDefault;

	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	switch (qual_type->getTypeClass()) {
	case clang::Type::Atomic:
	case clang::Type::Auto:
	case clang::Type::Decltype:
	case clang::Type::Elaborated:
	case clang::Type::Paren:
	case clang::Type::Typedef:
	case clang::Type::TypeOf:
	case clang::Type::TypeOfExpr:
	case clang::Type::Using:
	llvm_unreachable("Handled in RemoveWrappingTypes!");
	case clang::Type::UnaryTransform:
	break;

	case clang::Type::FunctionNoProto:
	case clang::Type::FunctionProto:
	break;

	case clang::Type::IncompleteArray:
	case clang::Type::VariableArray:
	break;

	case clang::Type::ConstantArray:
	return lldb::eFormatVoid; // no value

	case clang::Type::DependentVector:
	case clang::Type::ExtVector:
	case clang::Type::Vector:
	break;

	case clang::Type::BitInt:
	case clang::Type::DependentBitInt:
	return qual_type->isUnsignedIntegerType() ? lldb::eFormatUnsigned
	: lldb::eFormatDecimal;

	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	case clang::BuiltinType::UnknownAny:
	case clang::BuiltinType::Void:
	case clang::BuiltinType::BoundMember:
	break;

	case clang::BuiltinType::Bool:
	return lldb::eFormatBoolean;
	case clang::BuiltinType::Char_S:
	case clang::BuiltinType::SChar:
	case clang::BuiltinType::WChar_S:
	case clang::BuiltinType::Char_U:
	case clang::BuiltinType::UChar:
	case clang::BuiltinType::WChar_U:
	return lldb::eFormatChar;
	case clang::BuiltinType::Char8:
	return lldb::eFormatUnicode8;
	case clang::BuiltinType::Char16:
	return lldb::eFormatUnicode16;
	case clang::BuiltinType::Char32:
	return lldb::eFormatUnicode32;
	case clang::BuiltinType::UShort:
	return lldb::eFormatUnsigned;
	case clang::BuiltinType::Short:
	return lldb::eFormatDecimal;
	case clang::BuiltinType::UInt:
	return lldb::eFormatUnsigned;
	case clang::BuiltinType::Int:
	return lldb::eFormatDecimal;
	case clang::BuiltinType::ULong:
	return lldb::eFormatUnsigned;
	case clang::BuiltinType::Long:
	return lldb::eFormatDecimal;
	case clang::BuiltinType::ULongLong:
	return lldb::eFormatUnsigned;
	case clang::BuiltinType::LongLong:
	return lldb::eFormatDecimal;
	case clang::BuiltinType::UInt128:
	return lldb::eFormatUnsigned;
	case clang::BuiltinType::Int128:
	return lldb::eFormatDecimal;
	case clang::BuiltinType::Half:
	case clang::BuiltinType::Float:
	case clang::BuiltinType::Double:
	case clang::BuiltinType::LongDouble:
	return lldb::eFormatFloat;
	default:
	return lldb::eFormatHex;
	}
	break;
	case clang::Type::ObjCObjectPointer:
	return lldb::eFormatHex;
	case clang::Type::BlockPointer:
	return lldb::eFormatHex;
	case clang::Type::Pointer:
	return lldb::eFormatHex;
	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	return lldb::eFormatHex;
	case clang::Type::MemberPointer:
	break;
	case clang::Type::Complex: {
	if (qual_type->isComplexType())
	return lldb::eFormatComplex;
	else
	return lldb::eFormatComplexInteger;
	}
	case clang::Type::ObjCInterface:
	break;
	case clang::Type::Record:
	break;
	case clang::Type::Enum:
	return lldb::eFormatEnum;
	case clang::Type::DependentSizedArray:
	case clang::Type::DependentSizedExtVector:
	case clang::Type::UnresolvedUsing:
	case clang::Type::Attributed:
	case clang::Type::BTFTagAttributed:
	case clang::Type::TemplateTypeParm:
	case clang::Type::SubstTemplateTypeParm:
	case clang::Type::SubstTemplateTypeParmPack:
	case clang::Type::InjectedClassName:
	case clang::Type::DependentName:
	case clang::Type::DependentTemplateSpecialization:
	case clang::Type::PackExpansion:
	case clang::Type::ObjCObject:

	case clang::Type::TemplateSpecialization:
	case clang::Type::DeducedTemplateSpecialization:
	case clang::Type::Adjusted:
	case clang::Type::Pipe:
	break;

	// pointer type decayed from an array or function type.
	case clang::Type::Decayed:
	break;
	case clang::Type::ObjCTypeParam:
	break;

	case clang::Type::DependentAddressSpace:
	break;
	case clang::Type::MacroQualified:
	break;

	// Matrix types we're not sure how to display yet.
	case clang::Type::ConstantMatrix:
	case clang::Type::DependentSizedMatrix:
	break;
	}
	// We don't know hot to display this type...
	return lldb::eFormatBytes;
	}

	static bool ObjCDeclHasIVars(clang::ObjCInterfaceDecl *class_interface_decl,
	bool check_superclass) {
	while (class_interface_decl) {
	if (class_interface_decl->ivar_size() > 0)
	return true;

	if (check_superclass)
	class_interface_decl = class_interface_decl->getSuperClass();
	else
	break;
	}
	return false;
	}

	static Optional<SymbolFile::ArrayInfo>
	GetDynamicArrayInfo(TypeSystemClang &ast, SymbolFile *sym_file,
	clang::QualType qual_type,
	const ExecutionContext *exe_ctx) {
	if (qual_type->isIncompleteArrayType())
	if (auto *metadata = ast.GetMetadata(qual_type.getTypePtr()))
	return sym_file->GetDynamicArrayInfoForUID(metadata->GetUserID(),
	exe_ctx);
	return llvm::None;
	}

	uint32_t TypeSystemClang::GetNumChildren(lldb::opaque_compiler_type_t type,
	bool omit_empty_base_classes,
	const ExecutionContext *exe_ctx) {
	if (!type)
	return 0;

	uint32_t num_children = 0;
	clang::QualType qual_type(RemoveWrappingTypes(GetQualType(type)));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	case clang::BuiltinType::ObjCId: // child is Class
	case clang::BuiltinType::ObjCClass: // child is Class
	num_children = 1;
	break;

	default:
	break;
	}
	break;

	case clang::Type::Complex:
	return 0;
	case clang::Type::Record:
	if (GetCompleteQualType(&getASTContext(), qual_type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	assert(record_decl);
	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl) {
	if (omit_empty_base_classes) {
	// Check each base classes to see if it or any of its base classes
	// contain any fields. This can help limit the noise in variable
	// views by not having to show base classes that contain no members.
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	const clang::CXXRecordDecl *base_class_decl =
	llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()
	->getAs<clang::RecordType>()
	->getDecl());

	// Skip empty base classes
	if (!TypeSystemClang::RecordHasFields(base_class_decl))
	continue;

	num_children++;
	}
	} else {
	// Include all base classes
	num_children += cxx_record_decl->getNumBases();
	}
	}
	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field)
	++num_children;
	}
	break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteQualType(&getASTContext(), qual_type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {

	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();
	if (superclass_interface_decl) {
	if (omit_empty_base_classes) {
	if (ObjCDeclHasIVars(superclass_interface_decl, true))
	++num_children;
	} else
	++num_children;
	}

	num_children += class_interface_decl->ivar_size();
	}
	}
	}
	break;

	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	case clang::Type::ObjCObjectPointer: {
	CompilerType pointee_clang_type(GetPointeeType(type));

	uint32_t num_pointee_children = 0;
	if (pointee_clang_type.IsAggregateType())
	num_pointee_children =
	pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx);
	// If this type points to a simple type, then it has 1 child
	if (num_pointee_children == 0)
	num_children = 1;
	else
	num_children = num_pointee_children;
	} break;

	case clang::Type::Vector:
	case clang::Type::ExtVector:
	num_children =
	llvm::cast<clang::VectorType>(qual_type.getTypePtr())->getNumElements();
	break;

	case clang::Type::ConstantArray:
	num_children = llvm::cast<clang::ConstantArrayType>(qual_type.getTypePtr())
	->getSize()
	.getLimitedValue();
	break;
	case clang::Type::IncompleteArray:
	if (auto array_info =
	GetDynamicArrayInfo(*this, GetSymbolFile(), qual_type, exe_ctx))
	// Only 1-dimensional arrays are supported.
	num_children = array_info->element_orders.size()
	? array_info->element_orders.back()
	: 0;
	break;

	case clang::Type::Pointer: {
	const clang::PointerType *pointer_type =
	llvm::cast<clang::PointerType>(qual_type.getTypePtr());
	clang::QualType pointee_type(pointer_type->getPointeeType());
	CompilerType pointee_clang_type(GetType(pointee_type));
	uint32_t num_pointee_children = 0;
	if (pointee_clang_type.IsAggregateType())
	num_pointee_children =
	pointee_clang_type.GetNumChildren(omit_empty_base_classes, exe_ctx);
	if (num_pointee_children == 0) {
	// We have a pointer to a pointee type that claims it has no children. We
	// will want to look at
	num_children = GetNumPointeeChildren(pointee_type);
	} else
	num_children = num_pointee_children;
	} break;

	default:
	break;
	}
	return num_children;
	}

	CompilerType TypeSystemClang::GetBuiltinTypeByName(ConstString name) {
	return GetBasicType(GetBasicTypeEnumeration(name));
	}

	lldb::BasicType
	TypeSystemClang::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) {
	if (type) {
	clang::QualType qual_type(GetQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	if (type_class == clang::Type::Builtin) {
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	case clang::BuiltinType::Void:
	return eBasicTypeVoid;
	case clang::BuiltinType::Bool:
	return eBasicTypeBool;
	case clang::BuiltinType::Char_S:
	return eBasicTypeSignedChar;
	case clang::BuiltinType::Char_U:
	return eBasicTypeUnsignedChar;
	case clang::BuiltinType::Char8:
	return eBasicTypeChar8;
	case clang::BuiltinType::Char16:
	return eBasicTypeChar16;
	case clang::BuiltinType::Char32:
	return eBasicTypeChar32;
	case clang::BuiltinType::UChar:
	return eBasicTypeUnsignedChar;
	case clang::BuiltinType::SChar:
	return eBasicTypeSignedChar;
	case clang::BuiltinType::WChar_S:
	return eBasicTypeSignedWChar;
	case clang::BuiltinType::WChar_U:
	return eBasicTypeUnsignedWChar;
	case clang::BuiltinType::Short:
	return eBasicTypeShort;
	case clang::BuiltinType::UShort:
	return eBasicTypeUnsignedShort;
	case clang::BuiltinType::Int:
	return eBasicTypeInt;
	case clang::BuiltinType::UInt:
	return eBasicTypeUnsignedInt;
	case clang::BuiltinType::Long:
	return eBasicTypeLong;
	case clang::BuiltinType::ULong:
	return eBasicTypeUnsignedLong;
	case clang::BuiltinType::LongLong:
	return eBasicTypeLongLong;
	case clang::BuiltinType::ULongLong:
	return eBasicTypeUnsignedLongLong;
	case clang::BuiltinType::Int128:
	return eBasicTypeInt128;
	case clang::BuiltinType::UInt128:
	return eBasicTypeUnsignedInt128;

	case clang::BuiltinType::Half:
	return eBasicTypeHalf;
	case clang::BuiltinType::Float:
	return eBasicTypeFloat;
	case clang::BuiltinType::Double:
	return eBasicTypeDouble;
	case clang::BuiltinType::LongDouble:
	return eBasicTypeLongDouble;

	case clang::BuiltinType::NullPtr:
	return eBasicTypeNullPtr;
	case clang::BuiltinType::ObjCId:
	return eBasicTypeObjCID;
	case clang::BuiltinType::ObjCClass:
	return eBasicTypeObjCClass;
	case clang::BuiltinType::ObjCSel:
	return eBasicTypeObjCSel;
	default:
	return eBasicTypeOther;
	}
	}
	}
	return eBasicTypeInvalid;
	}

	void TypeSystemClang::ForEachEnumerator(
	lldb::opaque_compiler_type_t type,
	std::function<bool(const CompilerType &integer_type,
	ConstString name,
	const llvm::APSInt &value)> const &callback) {
	const clang::EnumType *enum_type =
	llvm::dyn_cast<clang::EnumType>(GetCanonicalQualType(type));
	if (enum_type) {
	const clang::EnumDecl *enum_decl = enum_type->getDecl();
	if (enum_decl) {
	CompilerType integer_type = GetType(enum_decl->getIntegerType());

	clang::EnumDecl::enumerator_iterator enum_pos, enum_end_pos;
	for (enum_pos = enum_decl->enumerator_begin(),
	enum_end_pos = enum_decl->enumerator_end();
	enum_pos != enum_end_pos; ++enum_pos) {
	ConstString name(enum_pos->getNameAsString().c_str());
	if (!callback(integer_type, name, enum_pos->getInitVal()))
	break;
	}
	}
	}
	}

	#pragma mark Aggregate Types

	uint32_t TypeSystemClang::GetNumFields(lldb::opaque_compiler_type_t type) {
	if (!type)
	return 0;

	uint32_t count = 0;
	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::dyn_cast<clang::RecordType>(qual_type.getTypePtr());
	if (record_type) {
	clang::RecordDecl *record_decl = record_type->getDecl();
	if (record_decl) {
	uint32_t field_idx = 0;
	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field)
	++field_idx;
	count = field_idx;
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	const clang::ObjCObjectPointerType *objc_class_type =
	qual_type->castAs<clang::ObjCObjectPointerType>();
	const clang::ObjCInterfaceType *objc_interface_type =
	objc_class_type->getInterfaceType();
	if (objc_interface_type &&
	GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
	const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getDecl();
	if (class_interface_decl) {
	count = class_interface_decl->ivar_size();
	}
	}
	break;
	}

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl)
	count = class_interface_decl->ivar_size();
	}
	}
	break;

	default:
	break;
	}
	return count;
	}

	static lldb::opaque_compiler_type_t
	GetObjCFieldAtIndex(clang::ASTContext *ast,
	clang::ObjCInterfaceDecl *class_interface_decl, size_t idx,
	std::string &name, uint64_t *bit_offset_ptr,
	uint32_t bitfield_bit_size_ptr, bool is_bitfield_ptr) {
	if (class_interface_decl) {
	if (idx < (class_interface_decl->ivar_size())) {
	clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
	ivar_end = class_interface_decl->ivar_end();
	uint32_t ivar_idx = 0;

	for (ivar_pos = class_interface_decl->ivar_begin(); ivar_pos != ivar_end;
	++ivar_pos, ++ivar_idx) {
	if (ivar_idx == idx) {
	const clang::ObjCIvarDecl ivar_decl = ivar_pos;

	clang::QualType ivar_qual_type(ivar_decl->getType());

	name.assign(ivar_decl->getNameAsString());

	if (bit_offset_ptr) {
	const clang::ASTRecordLayout &interface_layout =
	ast->getASTObjCInterfaceLayout(class_interface_decl);
	*bit_offset_ptr = interface_layout.getFieldOffset(ivar_idx);
	}

	const bool is_bitfield = ivar_pos->isBitField();

	if (bitfield_bit_size_ptr) {
	*bitfield_bit_size_ptr = 0;

	if (is_bitfield && ast) {
	clang::Expr *bitfield_bit_size_expr = ivar_pos->getBitWidth();
	clang::Expr::EvalResult result;
	if (bitfield_bit_size_expr &&
	bitfield_bit_size_expr->EvaluateAsInt(result, *ast)) {
	llvm::APSInt bitfield_apsint = result.Val.getInt();
	*bitfield_bit_size_ptr = bitfield_apsint.getLimitedValue();
	}
	}
	}
	if (is_bitfield_ptr)
	*is_bitfield_ptr = is_bitfield;

	return ivar_qual_type.getAsOpaquePtr();
	}
	}
	}
	}
	return nullptr;
	}

	CompilerType TypeSystemClang::GetFieldAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx, std::string &name,
	uint64_t *bit_offset_ptr,
	uint32_t *bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) {
	if (!type)
	return CompilerType();

	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	uint32_t field_idx = 0;
	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field, ++field_idx) {
	if (idx == field_idx) {
	// Print the member type if requested
	// Print the member name and equal sign
	name.assign(field->getNameAsString());

	// Figure out the type byte size (field_type_info.first) and
	// alignment (field_type_info.second) from the AST context.
	if (bit_offset_ptr) {
	const clang::ASTRecordLayout &record_layout =
	getASTContext().getASTRecordLayout(record_decl);
	*bit_offset_ptr = record_layout.getFieldOffset(field_idx);
	}

	const bool is_bitfield = field->isBitField();

	if (bitfield_bit_size_ptr) {
	*bitfield_bit_size_ptr = 0;

	if (is_bitfield) {
	clang::Expr *bitfield_bit_size_expr = field->getBitWidth();
	clang::Expr::EvalResult result;
	if (bitfield_bit_size_expr &&
	bitfield_bit_size_expr->EvaluateAsInt(result,
	getASTContext())) {
	llvm::APSInt bitfield_apsint = result.Val.getInt();
	*bitfield_bit_size_ptr = bitfield_apsint.getLimitedValue();
	}
	}
	}
	if (is_bitfield_ptr)
	*is_bitfield_ptr = is_bitfield;

	return GetType(field->getType());
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	const clang::ObjCObjectPointerType *objc_class_type =
	qual_type->castAs<clang::ObjCObjectPointerType>();
	const clang::ObjCInterfaceType *objc_interface_type =
	objc_class_type->getInterfaceType();
	if (objc_interface_type &&
	GetCompleteType(static_cast<lldb::opaque_compiler_type_t>(
	const_cast<clang::ObjCInterfaceType *>(objc_interface_type)))) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getDecl();
	if (class_interface_decl) {
	return CompilerType(
	this, GetObjCFieldAtIndex(&getASTContext(), class_interface_decl,
	idx, name, bit_offset_ptr,
	bitfield_bit_size_ptr, is_bitfield_ptr));
	}
	}
	break;
	}

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	return CompilerType(
	this, GetObjCFieldAtIndex(&getASTContext(), class_interface_decl,
	idx, name, bit_offset_ptr,
	bitfield_bit_size_ptr, is_bitfield_ptr));
	}
	}
	break;

	default:
	break;
	}
	return CompilerType();
	}

	uint32_t
	TypeSystemClang::GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) {
	uint32_t count = 0;
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl)
	count = cxx_record_decl->getNumBases();
	}
	break;

	case clang::Type::ObjCObjectPointer:
	count = GetPointeeType(type).GetNumDirectBaseClasses();
	break;

	case clang::Type::ObjCObject:
	if (GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	qual_type->getAsObjCQualifiedInterfaceType();
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl && class_interface_decl->getSuperClass())
	count = 1;
	}
	}
	break;
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	const clang::ObjCInterfaceType *objc_interface_type =
	qual_type->getAs<clang::ObjCInterfaceType>();
	if (objc_interface_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getInterface();

	if (class_interface_decl && class_interface_decl->getSuperClass())
	count = 1;
	}
	}
	break;

	default:
	break;
	}
	return count;
	}

	uint32_t
	TypeSystemClang::GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) {
	uint32_t count = 0;
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl)
	count = cxx_record_decl->getNumVBases();
	}
	break;

	default:
	break;
	}
	return count;
	}

	CompilerType TypeSystemClang::GetDirectBaseClassAtIndex(
	lldb::opaque_compiler_type_t type, size_t idx, uint32_t *bit_offset_ptr) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	uint32_t curr_idx = 0;
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class, ++curr_idx) {
	if (curr_idx == idx) {
	if (bit_offset_ptr) {
	const clang::ASTRecordLayout &record_layout =
	getASTContext().getASTRecordLayout(cxx_record_decl);
	const clang::CXXRecordDecl *base_class_decl =
	llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()
	->castAs<clang::RecordType>()
	->getDecl());
	if (base_class->isVirtual())
	*bit_offset_ptr =
	record_layout.getVBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	else
	*bit_offset_ptr =
	record_layout.getBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	}
	return GetType(base_class->getType());
	}
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer:
	return GetPointeeType(type).GetDirectBaseClassAtIndex(idx, bit_offset_ptr);

	case clang::Type::ObjCObject:
	if (idx == 0 && GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	qual_type->getAsObjCQualifiedInterfaceType();
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {
	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();
	if (superclass_interface_decl) {
	if (bit_offset_ptr)
	*bit_offset_ptr = 0;
	return GetType(getASTContext().getObjCInterfaceType(
	superclass_interface_decl));
	}
	}
	}
	}
	break;
	case clang::Type::ObjCInterface:
	if (idx == 0 && GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_interface_type =
	qual_type->getAs<clang::ObjCInterfaceType>();
	if (objc_interface_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_interface_type->getInterface();

	if (class_interface_decl) {
	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();
	if (superclass_interface_decl) {
	if (bit_offset_ptr)
	*bit_offset_ptr = 0;
	return GetType(getASTContext().getObjCInterfaceType(
	superclass_interface_decl));
	}
	}
	}
	}
	break;

	default:
	break;
	}
	return CompilerType();
	}

	CompilerType TypeSystemClang::GetVirtualBaseClassAtIndex(
	lldb::opaque_compiler_type_t type, size_t idx, uint32_t *bit_offset_ptr) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	uint32_t curr_idx = 0;
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->vbases_begin(),
	base_class_end = cxx_record_decl->vbases_end();
	base_class != base_class_end; ++base_class, ++curr_idx) {
	if (curr_idx == idx) {
	if (bit_offset_ptr) {
	const clang::ASTRecordLayout &record_layout =
	getASTContext().getASTRecordLayout(cxx_record_decl);
	const clang::CXXRecordDecl *base_class_decl =
	llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()
	->castAs<clang::RecordType>()
	->getDecl());
	*bit_offset_ptr =
	record_layout.getVBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	}
	return GetType(base_class->getType());
	}
	}
	}
	}
	break;

	default:
	break;
	}
	return CompilerType();
	}

	// If a pointer to a pointee type (the clang_type arg) says that it has no
	// children, then we either need to trust it, or override it and return a
	// different result. For example, an "int *" has one child that is an integer,
	// but a function pointer doesn't have any children. Likewise if a Record type
	// claims it has no children, then there really is nothing to show.
	uint32_t TypeSystemClang::GetNumPointeeChildren(clang::QualType type) {
	if (type.isNull())
	return 0;

	clang::QualType qual_type = RemoveWrappingTypes(type.getCanonicalType());
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Builtin:
	switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
	case clang::BuiltinType::UnknownAny:
	case clang::BuiltinType::Void:
	case clang::BuiltinType::NullPtr:
	case clang::BuiltinType::OCLEvent:
	case clang::BuiltinType::OCLImage1dRO:
	case clang::BuiltinType::OCLImage1dWO:
	case clang::BuiltinType::OCLImage1dRW:
	case clang::BuiltinType::OCLImage1dArrayRO:
	case clang::BuiltinType::OCLImage1dArrayWO:
	case clang::BuiltinType::OCLImage1dArrayRW:
	case clang::BuiltinType::OCLImage1dBufferRO:
	case clang::BuiltinType::OCLImage1dBufferWO:
	case clang::BuiltinType::OCLImage1dBufferRW:
	case clang::BuiltinType::OCLImage2dRO:
	case clang::BuiltinType::OCLImage2dWO:
	case clang::BuiltinType::OCLImage2dRW:
	case clang::BuiltinType::OCLImage2dArrayRO:
	case clang::BuiltinType::OCLImage2dArrayWO:
	case clang::BuiltinType::OCLImage2dArrayRW:
	case clang::BuiltinType::OCLImage3dRO:
	case clang::BuiltinType::OCLImage3dWO:
	case clang::BuiltinType::OCLImage3dRW:
	case clang::BuiltinType::OCLSampler:
	return 0;
	case clang::BuiltinType::Bool:
	case clang::BuiltinType::Char_U:
	case clang::BuiltinType::UChar:
	case clang::BuiltinType::WChar_U:
	case clang::BuiltinType::Char16:
	case clang::BuiltinType::Char32:
	case clang::BuiltinType::UShort:
	case clang::BuiltinType::UInt:
	case clang::BuiltinType::ULong:
	case clang::BuiltinType::ULongLong:
	case clang::BuiltinType::UInt128:
	case clang::BuiltinType::Char_S:
	case clang::BuiltinType::SChar:
	case clang::BuiltinType::WChar_S:
	case clang::BuiltinType::Short:
	case clang::BuiltinType::Int:
	case clang::BuiltinType::Long:
	case clang::BuiltinType::LongLong:
	case clang::BuiltinType::Int128:
	case clang::BuiltinType::Float:
	case clang::BuiltinType::Double:
	case clang::BuiltinType::LongDouble:
	case clang::BuiltinType::Dependent:
	case clang::BuiltinType::Overload:
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	case clang::BuiltinType::ObjCSel:
	case clang::BuiltinType::BoundMember:
	case clang::BuiltinType::Half:
	case clang::BuiltinType::ARCUnbridgedCast:
	case clang::BuiltinType::PseudoObject:
	case clang::BuiltinType::BuiltinFn:
	case clang::BuiltinType::OMPArraySection:
	return 1;
	default:
	return 0;
	}
	break;

	case clang::Type::Complex:
	return 1;
	case clang::Type::Pointer:
	return 1;
	case clang::Type::BlockPointer:
	return 0; // If block pointers don't have debug info, then no children for
	// them
	case clang::Type::LValueReference:
	return 1;
	case clang::Type::RValueReference:
	return 1;
	case clang::Type::MemberPointer:
	return 0;
	case clang::Type::ConstantArray:
	return 0;
	case clang::Type::IncompleteArray:
	return 0;
	case clang::Type::VariableArray:
	return 0;
	case clang::Type::DependentSizedArray:
	return 0;
	case clang::Type::DependentSizedExtVector:
	return 0;
	case clang::Type::Vector:
	return 0;
	case clang::Type::ExtVector:
	return 0;
	case clang::Type::FunctionProto:
	return 0; // When we function pointers, they have no children...
	case clang::Type::FunctionNoProto:
	return 0; // When we function pointers, they have no children...
	case clang::Type::UnresolvedUsing:
	return 0;
	case clang::Type::Record:
	return 0;
	case clang::Type::Enum:
	return 1;
	case clang::Type::TemplateTypeParm:
	return 1;
	case clang::Type::SubstTemplateTypeParm:
	return 1;
	case clang::Type::TemplateSpecialization:
	return 1;
	case clang::Type::InjectedClassName:
	return 0;
	case clang::Type::DependentName:
	return 1;
	case clang::Type::DependentTemplateSpecialization:
	return 1;
	case clang::Type::ObjCObject:
	return 0;
	case clang::Type::ObjCInterface:
	return 0;
	case clang::Type::ObjCObjectPointer:
	return 1;
	default:
	break;
	}
	return 0;
	}

	CompilerType TypeSystemClang::GetChildCompilerTypeAtIndex(
	lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
	bool transparent_pointers, bool omit_empty_base_classes,
	bool ignore_array_bounds, std::string &child_name,
	uint32_t &child_byte_size, int32_t &child_byte_offset,
	uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
	bool &child_is_base_class, bool &child_is_deref_of_parent,
	ValueObject *valobj, uint64_t &language_flags) {
	if (!type)
	return CompilerType();

	auto get_exe_scope = [&exe_ctx]() {
	return exe_ctx ? exe_ctx->GetBestExecutionContextScope() : nullptr;
	};

	clang::QualType parent_qual_type(
	RemoveWrappingTypes(GetCanonicalQualType(type)));
	const clang::Type::TypeClass parent_type_class =
	parent_qual_type->getTypeClass();
	child_bitfield_bit_size = 0;
	child_bitfield_bit_offset = 0;
	child_is_base_class = false;
	language_flags = 0;

	const bool idx_is_valid =
	idx < GetNumChildren(type, omit_empty_base_classes, exe_ctx);
	int32_t bit_offset;
	switch (parent_type_class) {
	case clang::Type::Builtin:
	if (idx_is_valid) {
	switch (llvm::cast<clang::BuiltinType>(parent_qual_type)->getKind()) {
	case clang::BuiltinType::ObjCId:
	case clang::BuiltinType::ObjCClass:
	child_name = "isa";
	child_byte_size =
	getASTContext().getTypeSize(getASTContext().ObjCBuiltinClassTy) /
	CHAR_BIT;
	return GetType(getASTContext().ObjCBuiltinClassTy);

	default:
	break;
	}
	}
	break;

	case clang::Type::Record:
	if (idx_is_valid && GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(parent_qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	assert(record_decl);
	const clang::ASTRecordLayout &record_layout =
	getASTContext().getASTRecordLayout(record_decl);
	uint32_t child_idx = 0;

	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl) {
	// We might have base classes to print out first
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	const clang::CXXRecordDecl *base_class_decl = nullptr;

	// Skip empty base classes
	if (omit_empty_base_classes) {
	base_class_decl = llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()->getAs<clang::RecordType>()->getDecl());
	if (!TypeSystemClang::RecordHasFields(base_class_decl))
	continue;
	}

	if (idx == child_idx) {
	if (base_class_decl == nullptr)
	base_class_decl = llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()->getAs<clang::RecordType>()->getDecl());

	if (base_class->isVirtual()) {
	bool handled = false;
	if (valobj) {
	clang::VTableContextBase *vtable_ctx =
	getASTContext().getVTableContext();
	if (vtable_ctx)
	handled = GetVBaseBitOffset(vtable_ctx, valobj,
	record_layout, cxx_record_decl,
	base_class_decl, bit_offset);
	}
	if (!handled)
	bit_offset = record_layout.getVBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	} else
	bit_offset = record_layout.getBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;

	// Base classes should be a multiple of 8 bits in size
	child_byte_offset = bit_offset / 8;
	CompilerType base_class_clang_type = GetType(base_class->getType());
	child_name = base_class_clang_type.GetTypeName().AsCString("");
	Optional<uint64_t> size =
	base_class_clang_type.GetBitSize(get_exe_scope());
	if (!size)
	return {};
	uint64_t base_class_clang_type_bit_size = *size;

	// Base classes bit sizes should be a multiple of 8 bits in size
	assert(base_class_clang_type_bit_size % 8 == 0);
	child_byte_size = base_class_clang_type_bit_size / 8;
	child_is_base_class = true;
	return base_class_clang_type;
	}
	// We don't increment the child index in the for loop since we might
	// be skipping empty base classes
	++child_idx;
	}
	}
	// Make sure index is in range...
	uint32_t field_idx = 0;
	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field, ++field_idx, ++child_idx) {
	if (idx == child_idx) {
	// Print the member type if requested
	// Print the member name and equal sign
	child_name.assign(field->getNameAsString());

	// Figure out the type byte size (field_type_info.first) and
	// alignment (field_type_info.second) from the AST context.
	CompilerType field_clang_type = GetType(field->getType());
	assert(field_idx < record_layout.getFieldCount());
	Optional<uint64_t> size =
	field_clang_type.GetByteSize(get_exe_scope());
	if (!size)
	return {};
	child_byte_size = *size;
	const uint32_t child_bit_size = child_byte_size * 8;

	// Figure out the field offset within the current struct/union/class
	// type
	bit_offset = record_layout.getFieldOffset(field_idx);
	if (FieldIsBitfield(*field, child_bitfield_bit_size)) {
	child_bitfield_bit_offset = bit_offset % child_bit_size;
	const uint32_t child_bit_offset =
	bit_offset - child_bitfield_bit_offset;
	child_byte_offset = child_bit_offset / 8;
	} else {
	child_byte_offset = bit_offset / 8;
	}

	return field_clang_type;
	}
	}
	}
	break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (idx_is_valid && GetCompleteType(type)) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(parent_qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	uint32_t child_idx = 0;
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {

	const clang::ASTRecordLayout &interface_layout =
	getASTContext().getASTObjCInterfaceLayout(class_interface_decl);
	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();
	if (superclass_interface_decl) {
	if (omit_empty_base_classes) {
	CompilerType base_class_clang_type =
	GetType(getASTContext().getObjCInterfaceType(
	superclass_interface_decl));
	if (base_class_clang_type.GetNumChildren(omit_empty_base_classes,
	exe_ctx) > 0) {
	if (idx == 0) {
	clang::QualType ivar_qual_type(
	getASTContext().getObjCInterfaceType(
	superclass_interface_decl));

	child_name.assign(
	superclass_interface_decl->getNameAsString());

	clang::TypeInfo ivar_type_info =
	getASTContext().getTypeInfo(ivar_qual_type.getTypePtr());

	child_byte_size = ivar_type_info.Width / 8;
	child_byte_offset = 0;
	child_is_base_class = true;

	return GetType(ivar_qual_type);
	}

	++child_idx;
	}
	} else
	++child_idx;
	}

	const uint32_t superclass_idx = child_idx;

	if (idx < (child_idx + class_interface_decl->ivar_size())) {
	clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
	ivar_end = class_interface_decl->ivar_end();

	for (ivar_pos = class_interface_decl->ivar_begin();
	ivar_pos != ivar_end; ++ivar_pos) {
	if (child_idx == idx) {
	clang::ObjCIvarDecl ivar_decl = ivar_pos;

	clang::QualType ivar_qual_type(ivar_decl->getType());

	child_name.assign(ivar_decl->getNameAsString());

	clang::TypeInfo ivar_type_info =
	getASTContext().getTypeInfo(ivar_qual_type.getTypePtr());

	child_byte_size = ivar_type_info.Width / 8;

	// Figure out the field offset within the current
	// struct/union/class type For ObjC objects, we can't trust the
	// bit offset we get from the Clang AST, since that doesn't
	// account for the space taken up by unbacked properties, or
	// from the changing size of base classes that are newer than
	// this class. So if we have a process around that we can ask
	// about this object, do so.
	child_byte_offset = LLDB_INVALID_IVAR_OFFSET;
	Process *process = nullptr;
	if (exe_ctx)
	process = exe_ctx->GetProcessPtr();
	if (process) {
	ObjCLanguageRuntime *objc_runtime =
	ObjCLanguageRuntime::Get(*process);
	if (objc_runtime != nullptr) {
	CompilerType parent_ast_type = GetType(parent_qual_type);
	child_byte_offset = objc_runtime->GetByteOffsetForIvar(
	parent_ast_type, ivar_decl->getNameAsString().c_str());
	}
	}

	// Setting this to INT32_MAX to make sure we don't compute it
	// twice...
	bit_offset = INT32_MAX;

	if (child_byte_offset ==
	static_cast<int32_t>(LLDB_INVALID_IVAR_OFFSET)) {
	bit_offset = interface_layout.getFieldOffset(child_idx -
	superclass_idx);
	child_byte_offset = bit_offset / 8;
	}

	// Note, the ObjC Ivar Byte offset is just that, it doesn't
	// account for the bit offset of a bitfield within its
	// containing object. So regardless of where we get the byte
	// offset from, we still need to get the bit offset for
	// bitfields from the layout.

	if (FieldIsBitfield(ivar_decl, child_bitfield_bit_size)) {
	if (bit_offset == INT32_MAX)
	bit_offset = interface_layout.getFieldOffset(
	child_idx - superclass_idx);

	child_bitfield_bit_offset = bit_offset % 8;
	}
	return GetType(ivar_qual_type);
	}
	++child_idx;
	}
	}
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer:
	if (idx_is_valid) {
	CompilerType pointee_clang_type(GetPointeeType(type));

	if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
	child_is_deref_of_parent = false;
	bool tmp_child_is_deref_of_parent = false;
	return pointee_clang_type.GetChildCompilerTypeAtIndex(
	exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
	ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
	child_bitfield_bit_size, child_bitfield_bit_offset,
	child_is_base_class, tmp_child_is_deref_of_parent, valobj,
	language_flags);
	} else {
	child_is_deref_of_parent = true;
	const char *parent_name =
	valobj ? valobj->GetName().GetCString() : nullptr;
	if (parent_name) {
	child_name.assign(1, '*');
	child_name += parent_name;
	}

	// We have a pointer to an simple type
	if (idx == 0 && pointee_clang_type.GetCompleteType()) {
	if (Optional<uint64_t> size =
	pointee_clang_type.GetByteSize(get_exe_scope())) {
	child_byte_size = *size;
	child_byte_offset = 0;
	return pointee_clang_type;
	}
	}
	}
	}
	break;

	case clang::Type::Vector:
	case clang::Type::ExtVector:
	if (idx_is_valid) {
	const clang::VectorType *array =
	llvm::cast<clang::VectorType>(parent_qual_type.getTypePtr());
	if (array) {
	CompilerType element_type = GetType(array->getElementType());
	if (element_type.GetCompleteType()) {
	char element_name[64];
	::snprintf(element_name, sizeof(element_name), "[%" PRIu64 "]",
	static_cast<uint64_t>(idx));
	child_name.assign(element_name);
	if (Optional<uint64_t> size =
	element_type.GetByteSize(get_exe_scope())) {
	child_byte_size = *size;
	child_byte_offset = (int32_t)idx * (int32_t)child_byte_size;
	return element_type;
	}
	}
	}
	}
	break;

	case clang::Type::ConstantArray:
	case clang::Type::IncompleteArray:
	if (ignore_array_bounds \|\| idx_is_valid) {
	const clang::ArrayType *array = GetQualType(type)->getAsArrayTypeUnsafe();
	if (array) {
	CompilerType element_type = GetType(array->getElementType());
	if (element_type.GetCompleteType()) {
	child_name = std::string(llvm::formatv("[{0}]", idx));
	if (Optional<uint64_t> size =
	element_type.GetByteSize(get_exe_scope())) {
	child_byte_size = *size;
	child_byte_offset = (int32_t)idx * (int32_t)child_byte_size;
	return element_type;
	}
	}
	}
	}
	break;

	case clang::Type::Pointer: {
	CompilerType pointee_clang_type(GetPointeeType(type));

	// Don't dereference "void *" pointers
	if (pointee_clang_type.IsVoidType())
	return CompilerType();

	if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
	child_is_deref_of_parent = false;
	bool tmp_child_is_deref_of_parent = false;
	return pointee_clang_type.GetChildCompilerTypeAtIndex(
	exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
	ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
	child_bitfield_bit_size, child_bitfield_bit_offset,
	child_is_base_class, tmp_child_is_deref_of_parent, valobj,
	language_flags);
	} else {
	child_is_deref_of_parent = true;

	const char *parent_name =
	valobj ? valobj->GetName().GetCString() : nullptr;
	if (parent_name) {
	child_name.assign(1, '*');
	child_name += parent_name;
	}

	// We have a pointer to an simple type
	if (idx == 0) {
	if (Optional<uint64_t> size =
	pointee_clang_type.GetByteSize(get_exe_scope())) {
	child_byte_size = *size;
	child_byte_offset = 0;
	return pointee_clang_type;
	}
	}
	}
	break;
	}

	case clang::Type::LValueReference:
	case clang::Type::RValueReference:
	if (idx_is_valid) {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(
	RemoveWrappingTypes(GetQualType(type)).getTypePtr());
	CompilerType pointee_clang_type =
	GetType(reference_type->getPointeeType());
	if (transparent_pointers && pointee_clang_type.IsAggregateType()) {
	child_is_deref_of_parent = false;
	bool tmp_child_is_deref_of_parent = false;
	return pointee_clang_type.GetChildCompilerTypeAtIndex(
	exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
	ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
	child_bitfield_bit_size, child_bitfield_bit_offset,
	child_is_base_class, tmp_child_is_deref_of_parent, valobj,
	language_flags);
	} else {
	const char *parent_name =
	valobj ? valobj->GetName().GetCString() : nullptr;
	if (parent_name) {
	child_name.assign(1, '&');
	child_name += parent_name;
	}

	// We have a pointer to an simple type
	if (idx == 0) {
	if (Optional<uint64_t> size =
	pointee_clang_type.GetByteSize(get_exe_scope())) {
	child_byte_size = *size;
	child_byte_offset = 0;
	return pointee_clang_type;
	}
	}
	}
	}
	break;

	default:
	break;
	}
	return CompilerType();
	}

	static uint32_t GetIndexForRecordBase(const clang::RecordDecl *record_decl,
	const clang::CXXBaseSpecifier *base_spec,
	bool omit_empty_base_classes) {
	uint32_t child_idx = 0;

	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);

	if (cxx_record_decl) {
	clang::CXXRecordDecl::base_class_const_iterator base_class, base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	if (omit_empty_base_classes) {
	if (BaseSpecifierIsEmpty(base_class))
	continue;
	}

	if (base_class == base_spec)
	return child_idx;
	++child_idx;
	}
	}

	return UINT32_MAX;
	}

	static uint32_t GetIndexForRecordChild(const clang::RecordDecl *record_decl,
	clang::NamedDecl *canonical_decl,
	bool omit_empty_base_classes) {
	uint32_t child_idx = TypeSystemClang::GetNumBaseClasses(
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl),
	omit_empty_base_classes);

	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(), field_end = record_decl->field_end();
	field != field_end; ++field, ++child_idx) {
	if (field->getCanonicalDecl() == canonical_decl)
	return child_idx;
	}

	return UINT32_MAX;
	}

	// Look for a child member (doesn't include base classes, but it does include
	// their members) in the type hierarchy. Returns an index path into
	// "clang_type" on how to reach the appropriate member.
	//
	// class A
	// {
	// public:
	// int m_a;
	// int m_b;
	// };
	//
	// class B
	// {
	// };
	//
	// class C :
	// public B,
	// public A
	// {
	// };
	//
	// If we have a clang type that describes "class C", and we wanted to looked
	// "m_b" in it:
	//
	// With omit_empty_base_classes == false we would get an integer array back
	// with: { 1, 1 } The first index 1 is the child index for "class A" within
	// class C The second index 1 is the child index for "m_b" within class A
	//
	// With omit_empty_base_classes == true we would get an integer array back
	// with: { 0, 1 } The first index 0 is the child index for "class A" within
	// class C (since class B doesn't have any members it doesn't count) The second
	// index 1 is the child index for "m_b" within class A

	size_t TypeSystemClang::GetIndexOfChildMemberWithName(
	lldb::opaque_compiler_type_t type, const char *name,
	bool omit_empty_base_classes, std::vector<uint32_t> &child_indexes) {
	if (type && name && name[0]) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();

	assert(record_decl);
	uint32_t child_idx = 0;

	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);

	// Try and find a field that matches NAME
	clang::RecordDecl::field_iterator field, field_end;
	llvm::StringRef name_sref(name);
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field, ++child_idx) {
	llvm::StringRef field_name = field->getName();
	if (field_name.empty()) {
	CompilerType field_type = GetType(field->getType());
	child_indexes.push_back(child_idx);
	if (field_type.GetIndexOfChildMemberWithName(
	name, omit_empty_base_classes, child_indexes))
	return child_indexes.size();
	child_indexes.pop_back();

	} else if (field_name.equals(name_sref)) {
	// We have to add on the number of base classes to this index!
	child_indexes.push_back(
	child_idx + TypeSystemClang::GetNumBaseClasses(
	cxx_record_decl, omit_empty_base_classes));
	return child_indexes.size();
	}
	}

	if (cxx_record_decl) {
	const clang::RecordDecl *parent_record_decl = cxx_record_decl;

	// Didn't find things easily, lets let clang do its thang...
	clang::IdentifierInfo &ident_ref =
	getASTContext().Idents.get(name_sref);
	clang::DeclarationName decl_name(&ident_ref);

	clang::CXXBasePaths paths;
	if (cxx_record_decl->lookupInBases(
	[decl_name](const clang::CXXBaseSpecifier *specifier,
	clang::CXXBasePath &path) {
	CXXRecordDecl *record =
	specifier->getType()->getAsCXXRecordDecl();
	auto r = record->lookup(decl_name);
	path.Decls = r.begin();
	return !r.empty();
	},
	paths)) {
	clang::CXXBasePaths::const_paths_iterator path,
	path_end = paths.end();
	for (path = paths.begin(); path != path_end; ++path) {
	const size_t num_path_elements = path->size();
	for (size_t e = 0; e < num_path_elements; ++e) {
	clang::CXXBasePathElement elem = (*path)[e];

	child_idx = GetIndexForRecordBase(parent_record_decl, elem.Base,
	omit_empty_base_classes);
	if (child_idx == UINT32_MAX) {
	child_indexes.clear();
	return 0;
	} else {
	child_indexes.push_back(child_idx);
	parent_record_decl = llvm::cast<clang::RecordDecl>(
	elem.Base->getType()
	->castAs<clang::RecordType>()
	->getDecl());
	}
	}
	for (clang::DeclContext::lookup_iterator I = path->Decls, E;
	I != E; ++I) {
	child_idx = GetIndexForRecordChild(
	parent_record_decl, *I, omit_empty_base_classes);
	if (child_idx == UINT32_MAX) {
	child_indexes.clear();
	return 0;
	} else {
	child_indexes.push_back(child_idx);
	}
	}
	}
	return child_indexes.size();
	}
	}
	}
	break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	llvm::StringRef name_sref(name);
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	uint32_t child_idx = 0;
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {
	clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
	ivar_end = class_interface_decl->ivar_end();
	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();

	for (ivar_pos = class_interface_decl->ivar_begin();
	ivar_pos != ivar_end; ++ivar_pos, ++child_idx) {
	const clang::ObjCIvarDecl ivar_decl = ivar_pos;

	if (ivar_decl->getName().equals(name_sref)) {
	if ((!omit_empty_base_classes && superclass_interface_decl) \|\|
	(omit_empty_base_classes &&
	ObjCDeclHasIVars(superclass_interface_decl, true)))
	++child_idx;

	child_indexes.push_back(child_idx);
	return child_indexes.size();
	}
	}

	if (superclass_interface_decl) {
	// The super class index is always zero for ObjC classes, so we
	// push it onto the child indexes in case we find an ivar in our
	// superclass...
	child_indexes.push_back(0);

	CompilerType superclass_clang_type =
	GetType(getASTContext().getObjCInterfaceType(
	superclass_interface_decl));
	if (superclass_clang_type.GetIndexOfChildMemberWithName(
	name, omit_empty_base_classes, child_indexes)) {
	// We did find an ivar in a superclass so just return the
	// results!
	return child_indexes.size();
	}

	// We didn't find an ivar matching "name" in our superclass, pop
	// the superclass zero index that we pushed on above.
	child_indexes.pop_back();
	}
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	CompilerType objc_object_clang_type = GetType(
	llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
	->getPointeeType());
	return objc_object_clang_type.GetIndexOfChildMemberWithName(
	name, omit_empty_base_classes, child_indexes);
	} break;

	case clang::Type::ConstantArray: {
	// const clang::ConstantArrayType *array =
	// llvm::cast<clang::ConstantArrayType>(parent_qual_type.getTypePtr());
	// const uint64_t element_count =
	// array->getSize().getLimitedValue();
	//
	// if (idx < element_count)
	// {
	// std::pair<uint64_t, unsigned> field_type_info =
	// ast->getTypeInfo(array->getElementType());
	//
	// char element_name[32];
	// ::snprintf (element_name, sizeof (element_name),
	// "%s[%u]", parent_name ? parent_name : "", idx);
	//
	// child_name.assign(element_name);
	// assert(field_type_info.first % 8 == 0);
	// child_byte_size = field_type_info.first / 8;
	// child_byte_offset = idx * child_byte_size;
	// return array->getElementType().getAsOpaquePtr();
	// }
	} break;

	// case clang::Type::MemberPointerType:
	// {
	// MemberPointerType *mem_ptr_type =
	// llvm::cast<MemberPointerType>(qual_type.getTypePtr());
	// clang::QualType pointee_type =
	// mem_ptr_type->getPointeeType();
	//
	// if (TypeSystemClang::IsAggregateType
	// (pointee_type.getAsOpaquePtr()))
	// {
	// return GetIndexOfChildWithName (ast,
	// mem_ptr_type->getPointeeType().getAsOpaquePtr(),
	// name);
	// }
	// }
	// break;
	//
	case clang::Type::LValueReference:
	case clang::Type::RValueReference: {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
	clang::QualType pointee_type(reference_type->getPointeeType());
	CompilerType pointee_clang_type = GetType(pointee_type);

	if (pointee_clang_type.IsAggregateType()) {
	return pointee_clang_type.GetIndexOfChildMemberWithName(
	name, omit_empty_base_classes, child_indexes);
	}
	} break;

	case clang::Type::Pointer: {
	CompilerType pointee_clang_type(GetPointeeType(type));

	if (pointee_clang_type.IsAggregateType()) {
	return pointee_clang_type.GetIndexOfChildMemberWithName(
	name, omit_empty_base_classes, child_indexes);
	}
	} break;

	default:
	break;
	}
	}
	return 0;
	}

	// Get the index of the child of "clang_type" whose name matches. This function
	// doesn't descend into the children, but only looks one level deep and name
	// matches can include base class names.

	uint32_t
	TypeSystemClang::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
	const char *name,
	bool omit_empty_base_classes) {
	if (type && name && name[0]) {
	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();

	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();

	assert(record_decl);
	uint32_t child_idx = 0;

	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);

	if (cxx_record_decl) {
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	// Skip empty base classes
	clang::CXXRecordDecl *base_class_decl =
	llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()
	->castAs<clang::RecordType>()
	->getDecl());
	if (omit_empty_base_classes &&
	!TypeSystemClang::RecordHasFields(base_class_decl))
	continue;

	CompilerType base_class_clang_type = GetType(base_class->getType());
	std::string base_class_type_name(
	base_class_clang_type.GetTypeName().AsCString(""));
	if (base_class_type_name == name)
	return child_idx;
	++child_idx;
	}
	}

	// Try and find a field that matches NAME
	clang::RecordDecl::field_iterator field, field_end;
	llvm::StringRef name_sref(name);
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field, ++child_idx) {
	if (field->getName().equals(name_sref))
	return child_idx;
	}
	}
	break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface:
	if (GetCompleteType(type)) {
	llvm::StringRef name_sref(name);
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	uint32_t child_idx = 0;
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {
	clang::ObjCInterfaceDecl::ivar_iterator ivar_pos,
	ivar_end = class_interface_decl->ivar_end();
	clang::ObjCInterfaceDecl *superclass_interface_decl =
	class_interface_decl->getSuperClass();

	for (ivar_pos = class_interface_decl->ivar_begin();
	ivar_pos != ivar_end; ++ivar_pos, ++child_idx) {
	const clang::ObjCIvarDecl ivar_decl = ivar_pos;

	if (ivar_decl->getName().equals(name_sref)) {
	if ((!omit_empty_base_classes && superclass_interface_decl) \|\|
	(omit_empty_base_classes &&
	ObjCDeclHasIVars(superclass_interface_decl, true)))
	++child_idx;

	return child_idx;
	}
	}

	if (superclass_interface_decl) {
	if (superclass_interface_decl->getName().equals(name_sref))
	return 0;
	}
	}
	}
	}
	break;

	case clang::Type::ObjCObjectPointer: {
	CompilerType pointee_clang_type = GetType(
	llvm::cast<clang::ObjCObjectPointerType>(qual_type.getTypePtr())
	->getPointeeType());
	return pointee_clang_type.GetIndexOfChildWithName(
	name, omit_empty_base_classes);
	} break;

	case clang::Type::ConstantArray: {
	// const clang::ConstantArrayType *array =
	// llvm::cast<clang::ConstantArrayType>(parent_qual_type.getTypePtr());
	// const uint64_t element_count =
	// array->getSize().getLimitedValue();
	//
	// if (idx < element_count)
	// {
	// std::pair<uint64_t, unsigned> field_type_info =
	// ast->getTypeInfo(array->getElementType());
	//
	// char element_name[32];
	// ::snprintf (element_name, sizeof (element_name),
	// "%s[%u]", parent_name ? parent_name : "", idx);
	//
	// child_name.assign(element_name);
	// assert(field_type_info.first % 8 == 0);
	// child_byte_size = field_type_info.first / 8;
	// child_byte_offset = idx * child_byte_size;
	// return array->getElementType().getAsOpaquePtr();
	// }
	} break;

	// case clang::Type::MemberPointerType:
	// {
	// MemberPointerType *mem_ptr_type =
	// llvm::cast<MemberPointerType>(qual_type.getTypePtr());
	// clang::QualType pointee_type =
	// mem_ptr_type->getPointeeType();
	//
	// if (TypeSystemClang::IsAggregateType
	// (pointee_type.getAsOpaquePtr()))
	// {
	// return GetIndexOfChildWithName (ast,
	// mem_ptr_type->getPointeeType().getAsOpaquePtr(),
	// name);
	// }
	// }
	// break;
	//
	case clang::Type::LValueReference:
	case clang::Type::RValueReference: {
	const clang::ReferenceType *reference_type =
	llvm::cast<clang::ReferenceType>(qual_type.getTypePtr());
	CompilerType pointee_type = GetType(reference_type->getPointeeType());

	if (pointee_type.IsAggregateType()) {
	return pointee_type.GetIndexOfChildWithName(name,
	omit_empty_base_classes);
	}
	} break;

	case clang::Type::Pointer: {
	const clang::PointerType *pointer_type =
	llvm::cast<clang::PointerType>(qual_type.getTypePtr());
	CompilerType pointee_type = GetType(pointer_type->getPointeeType());

	if (pointee_type.IsAggregateType()) {
	return pointee_type.GetIndexOfChildWithName(name,
	omit_empty_base_classes);
	} else {
	// if (parent_name)
	// {
	// child_name.assign(1, '*');
	// child_name += parent_name;
	// }
	//
	// // We have a pointer to an simple type
	// if (idx == 0)
	// {
	// std::pair<uint64_t, unsigned> clang_type_info
	// = ast->getTypeInfo(pointee_type);
	// assert(clang_type_info.first % 8 == 0);
	// child_byte_size = clang_type_info.first / 8;
	// child_byte_offset = 0;
	// return pointee_type.getAsOpaquePtr();
	// }
	}
	} break;

	default:
	break;
	}
	}
	return UINT32_MAX;
	}

	size_t
	-TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
	+TypeSystemClang::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
	+ bool expand_pack) {
	if (!type)
	return 0;

	clang::QualType qual_type = RemoveWrappingTypes(GetCanonicalQualType(type));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	const clang::ClassTemplateSpecializationDecl *template_decl =
	llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
	cxx_record_decl);
	- if (template_decl)
	- return template_decl->getTemplateArgs().size();
	+ if (template_decl) {
	+ const auto &template_arg_list = template_decl->getTemplateArgs();
	+ size_t num_args = template_arg_list.size();
	+ assert(num_args && "template specialization without any args");
	+ if (expand_pack && num_args) {
	+ const auto &pack = template_arg_list[num_args - 1];
	+ if (pack.getKind() == clang::TemplateArgument::Pack)
	+ num_args += pack.pack_size() - 1;
	+ }
	+ return num_args;
	+ }
	}
	}
	break;

	default:
	break;
	}

	return 0;
	}

	const clang::ClassTemplateSpecializationDecl *
	TypeSystemClang::GetAsTemplateSpecialization(
	lldb::opaque_compiler_type_t type) {
	if (!type)
	return nullptr;

	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));
	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record: {
	if (! GetCompleteType(type))
	return nullptr;
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (!cxx_record_decl)
	return nullptr;
	return llvm::dyn_cast<clang::ClassTemplateSpecializationDecl>(
	cxx_record_decl);
	}

	default:
	return nullptr;
	}
	}

	+const TemplateArgument *
	+GetNthTemplateArgument(const clang::ClassTemplateSpecializationDecl *decl,
	+ size_t idx, bool expand_pack) {
	+ const auto &args = decl->getTemplateArgs();
	+ const size_t args_size = args.size();
	+
	+ assert(args_size && "template specialization without any args");
	+ if (!args_size)
	+ return nullptr;
	+
	+ const size_t last_idx = args_size - 1;
	+
	+ // We're asked for a template argument that can't be a parameter pack, so
	+ // return it without worrying about 'expand_pack'.
	+ if (idx < last_idx)
	+ return &args[idx];
	+
	+ // We're asked for the last template argument but we don't want/need to
	+ // expand it.
	+ if (!expand_pack \|\| args[last_idx].getKind() != clang::TemplateArgument::Pack)
	+ return idx >= args.size() ? nullptr : &args[idx];
	+
	+ // Index into the expanded pack.
	+ // Note that 'idx' counts from the beginning of all template arguments
	+ // (including the ones preceding the parameter pack).
	+ const auto &pack = args[last_idx];
	+ const size_t pack_idx = idx - last_idx;
	+ const size_t pack_size = pack.pack_size();
	+ assert(pack_idx < pack_size && "parameter pack index out-of-bounds");
	+ return &pack.pack_elements()[pack_idx];
	+}
	+
	lldb::TemplateArgumentKind
	TypeSystemClang::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
	- size_t arg_idx) {
	+ size_t arg_idx, bool expand_pack) {
	const clang::ClassTemplateSpecializationDecl *template_decl =
	GetAsTemplateSpecialization(type);
	- if (! template_decl \|\| arg_idx >= template_decl->getTemplateArgs().size())
	+ if (!template_decl)
	+ return eTemplateArgumentKindNull;
	+
	+ const auto *arg = GetNthTemplateArgument(template_decl, arg_idx, expand_pack);
	+ if (!arg)
	return eTemplateArgumentKindNull;

	- switch (template_decl->getTemplateArgs()[arg_idx].getKind()) {
	+ switch (arg->getKind()) {
	case clang::TemplateArgument::Null:
	return eTemplateArgumentKindNull;

	case clang::TemplateArgument::NullPtr:
	return eTemplateArgumentKindNullPtr;

	case clang::TemplateArgument::Type:
	return eTemplateArgumentKindType;

	case clang::TemplateArgument::Declaration:
	return eTemplateArgumentKindDeclaration;

	case clang::TemplateArgument::Integral:
	return eTemplateArgumentKindIntegral;

	case clang::TemplateArgument::Template:
	return eTemplateArgumentKindTemplate;

	case clang::TemplateArgument::TemplateExpansion:
	return eTemplateArgumentKindTemplateExpansion;

	case clang::TemplateArgument::Expression:
	return eTemplateArgumentKindExpression;

	case clang::TemplateArgument::Pack:
	return eTemplateArgumentKindPack;
	}
	llvm_unreachable("Unhandled clang::TemplateArgument::ArgKind");
	}

	CompilerType
	TypeSystemClang::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
	- size_t idx) {
	+ size_t idx, bool expand_pack) {
	const clang::ClassTemplateSpecializationDecl *template_decl =
	GetAsTemplateSpecialization(type);
	- if (!template_decl \|\| idx >= template_decl->getTemplateArgs().size())
	+ if (!template_decl)
	return CompilerType();

	- const clang::TemplateArgument &template_arg =
	- template_decl->getTemplateArgs()[idx];
	- if (template_arg.getKind() != clang::TemplateArgument::Type)
	+ const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
	+ if (!arg \|\| arg->getKind() != clang::TemplateArgument::Type)
	return CompilerType();

	- return GetType(template_arg.getAsType());
	+ return GetType(arg->getAsType());
	}

	Optional<CompilerType::IntegralTemplateArgument>
	TypeSystemClang::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
	- size_t idx) {
	+ size_t idx, bool expand_pack) {
	const clang::ClassTemplateSpecializationDecl *template_decl =
	GetAsTemplateSpecialization(type);
	- if (! template_decl \|\| idx >= template_decl->getTemplateArgs().size())
	+ if (!template_decl)
	return llvm::None;

	- const clang::TemplateArgument &template_arg =
	- template_decl->getTemplateArgs()[idx];
	- if (template_arg.getKind() != clang::TemplateArgument::Integral)
	+ const auto *arg = GetNthTemplateArgument(template_decl, idx, expand_pack);
	+ if (!arg \|\| arg->getKind() != clang::TemplateArgument::Integral)
	return llvm::None;

	- return {
	- {template_arg.getAsIntegral(), GetType(template_arg.getIntegralType())}};
	+ return {{arg->getAsIntegral(), GetType(arg->getIntegralType())}};
	}

	CompilerType TypeSystemClang::GetTypeForFormatters(void *type) {
	if (type)
	return ClangUtil::RemoveFastQualifiers(CompilerType(this, type));
	return CompilerType();
	}

	clang::EnumDecl *TypeSystemClang::GetAsEnumDecl(const CompilerType &type) {
	const clang::EnumType *enutype =
	llvm::dyn_cast<clang::EnumType>(ClangUtil::GetCanonicalQualType(type));
	if (enutype)
	return enutype->getDecl();
	return nullptr;
	}

	clang::RecordDecl *TypeSystemClang::GetAsRecordDecl(const CompilerType &type) {
	const clang::RecordType *record_type =
	llvm::dyn_cast<clang::RecordType>(ClangUtil::GetCanonicalQualType(type));
	if (record_type)
	return record_type->getDecl();
	return nullptr;
	}

	clang::TagDecl *TypeSystemClang::GetAsTagDecl(const CompilerType &type) {
	return ClangUtil::GetAsTagDecl(type);
	}

	clang::TypedefNameDecl *
	TypeSystemClang::GetAsTypedefDecl(const CompilerType &type) {
	const clang::TypedefType *typedef_type =
	llvm::dyn_cast<clang::TypedefType>(ClangUtil::GetQualType(type));
	if (typedef_type)
	return typedef_type->getDecl();
	return nullptr;
	}

	clang::CXXRecordDecl *
	TypeSystemClang::GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type) {
	return GetCanonicalQualType(type)->getAsCXXRecordDecl();
	}

	clang::ObjCInterfaceDecl *
	TypeSystemClang::GetAsObjCInterfaceDecl(const CompilerType &type) {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(
	ClangUtil::GetCanonicalQualType(type));
	if (objc_class_type)
	return objc_class_type->getInterface();
	return nullptr;
	}

	clang::FieldDecl *TypeSystemClang::AddFieldToRecordType(
	const CompilerType &type, llvm::StringRef name,
	const CompilerType &field_clang_type, AccessType access,
	uint32_t bitfield_bit_size) {
	if (!type.IsValid() \|\| !field_clang_type.IsValid())
	return nullptr;
	TypeSystemClang *ast =
	llvm::dyn_cast_or_null<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return nullptr;
	clang::ASTContext &clang_ast = ast->getASTContext();
	clang::IdentifierInfo *ident = nullptr;
	if (!name.empty())
	ident = &clang_ast.Idents.get(name);

	clang::FieldDecl *field = nullptr;

	clang::Expr *bit_width = nullptr;
	if (bitfield_bit_size != 0) {
	llvm::APInt bitfield_bit_size_apint(clang_ast.getTypeSize(clang_ast.IntTy),
	bitfield_bit_size);
	bit_width = new (clang_ast)
	clang::IntegerLiteral(clang_ast, bitfield_bit_size_apint,
	clang_ast.IntTy, clang::SourceLocation());
	}

	clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);
	if (record_decl) {
	field = clang::FieldDecl::CreateDeserialized(clang_ast, 0);
	field->setDeclContext(record_decl);
	field->setDeclName(ident);
	field->setType(ClangUtil::GetQualType(field_clang_type));
	if (bit_width)
	field->setBitWidth(bit_width);
	SetMemberOwningModule(field, record_decl);

	if (name.empty()) {
	// Determine whether this field corresponds to an anonymous struct or
	// union.
	if (const clang::TagType *TagT =
	field->getType()->getAs<clang::TagType>()) {
	if (clang::RecordDecl *Rec =
	llvm::dyn_cast<clang::RecordDecl>(TagT->getDecl()))
	if (!Rec->getDeclName()) {
	Rec->setAnonymousStructOrUnion(true);
	field->setImplicit();
	}
	}
	}

	if (field) {
	clang::AccessSpecifier access_specifier =
	TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access);
	field->setAccess(access_specifier);

	if (clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<CXXRecordDecl>(record_decl)) {
	AddAccessSpecifierDecl(cxx_record_decl, ast->getASTContext(),
	ast->GetCXXRecordDeclAccess(cxx_record_decl),
	access_specifier);
	ast->SetCXXRecordDeclAccess(cxx_record_decl, access_specifier);
	}
	record_decl->addDecl(field);

	VerifyDecl(field);
	}
	} else {
	clang::ObjCInterfaceDecl *class_interface_decl =
	ast->GetAsObjCInterfaceDecl(type);

	if (class_interface_decl) {
	const bool is_synthesized = false;

	field_clang_type.GetCompleteType();

	auto *ivar = clang::ObjCIvarDecl::CreateDeserialized(clang_ast, 0);
	ivar->setDeclContext(class_interface_decl);
	ivar->setDeclName(ident);
	ivar->setType(ClangUtil::GetQualType(field_clang_type));
	ivar->setAccessControl(ConvertAccessTypeToObjCIvarAccessControl(access));
	if (bit_width)
	ivar->setBitWidth(bit_width);
	ivar->setSynthesize(is_synthesized);
	field = ivar;
	SetMemberOwningModule(field, class_interface_decl);

	if (field) {
	class_interface_decl->addDecl(field);

	VerifyDecl(field);
	}
	}
	}
	return field;
	}

	void TypeSystemClang::BuildIndirectFields(const CompilerType &type) {
	if (!type)
	return;

	TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return;

	clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);

	if (!record_decl)
	return;

	typedef llvm::SmallVector<clang::IndirectFieldDecl *, 1> IndirectFieldVector;

	IndirectFieldVector indirect_fields;
	clang::RecordDecl::field_iterator field_pos;
	clang::RecordDecl::field_iterator field_end_pos = record_decl->field_end();
	clang::RecordDecl::field_iterator last_field_pos = field_end_pos;
	for (field_pos = record_decl->field_begin(); field_pos != field_end_pos;
	last_field_pos = field_pos++) {
	if (field_pos->isAnonymousStructOrUnion()) {
	clang::QualType field_qual_type = field_pos->getType();

	const clang::RecordType *field_record_type =
	field_qual_type->getAs<clang::RecordType>();

	if (!field_record_type)
	continue;

	clang::RecordDecl *field_record_decl = field_record_type->getDecl();

	if (!field_record_decl)
	continue;

	for (clang::RecordDecl::decl_iterator
	di = field_record_decl->decls_begin(),
	de = field_record_decl->decls_end();
	di != de; ++di) {
	if (clang::FieldDecl *nested_field_decl =
	llvm::dyn_cast<clang::FieldDecl>(*di)) {
	clang::NamedDecl **chain =
	new (ast->getASTContext()) clang::NamedDecl *[2];
	chain[0] = *field_pos;
	chain[1] = nested_field_decl;
	clang::IndirectFieldDecl *indirect_field =
	clang::IndirectFieldDecl::Create(
	ast->getASTContext(), record_decl, clang::SourceLocation(),
	nested_field_decl->getIdentifier(),
	nested_field_decl->getType(), {chain, 2});
	SetMemberOwningModule(indirect_field, record_decl);

	indirect_field->setImplicit();

	indirect_field->setAccess(TypeSystemClang::UnifyAccessSpecifiers(
	field_pos->getAccess(), nested_field_decl->getAccess()));

	indirect_fields.push_back(indirect_field);
	} else if (clang::IndirectFieldDecl *nested_indirect_field_decl =
	llvm::dyn_cast<clang::IndirectFieldDecl>(*di)) {
	size_t nested_chain_size =
	nested_indirect_field_decl->getChainingSize();
	clang::NamedDecl **chain = new (ast->getASTContext())
	clang::NamedDecl *[nested_chain_size + 1];
	chain[0] = *field_pos;

	int chain_index = 1;
	for (clang::IndirectFieldDecl::chain_iterator
	nci = nested_indirect_field_decl->chain_begin(),
	nce = nested_indirect_field_decl->chain_end();
	nci < nce; ++nci) {
	chain[chain_index] = *nci;
	chain_index++;
	}

	clang::IndirectFieldDecl *indirect_field =
	clang::IndirectFieldDecl::Create(
	ast->getASTContext(), record_decl, clang::SourceLocation(),
	nested_indirect_field_decl->getIdentifier(),
	nested_indirect_field_decl->getType(),
	{chain, nested_chain_size + 1});
	SetMemberOwningModule(indirect_field, record_decl);

	indirect_field->setImplicit();

	indirect_field->setAccess(TypeSystemClang::UnifyAccessSpecifiers(
	field_pos->getAccess(), nested_indirect_field_decl->getAccess()));

	indirect_fields.push_back(indirect_field);
	}
	}
	}
	}

	// Check the last field to see if it has an incomplete array type as its last
	// member and if it does, the tell the record decl about it
	if (last_field_pos != field_end_pos) {
	if (last_field_pos->getType()->isIncompleteArrayType())
	record_decl->hasFlexibleArrayMember();
	}

	for (IndirectFieldVector::iterator ifi = indirect_fields.begin(),
	ife = indirect_fields.end();
	ifi < ife; ++ifi) {
	record_decl->addDecl(*ifi);
	}
	}

	void TypeSystemClang::SetIsPacked(const CompilerType &type) {
	if (type) {
	TypeSystemClang *ast =
	llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (ast) {
	clang::RecordDecl *record_decl = GetAsRecordDecl(type);

	if (!record_decl)
	return;

	record_decl->addAttr(
	clang::PackedAttr::CreateImplicit(ast->getASTContext()));
	}
	}
	}

	clang::VarDecl *TypeSystemClang::AddVariableToRecordType(
	const CompilerType &type, llvm::StringRef name,
	const CompilerType &var_type, AccessType access) {
	if (!type.IsValid() \|\| !var_type.IsValid())
	return nullptr;

	TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return nullptr;

	clang::RecordDecl *record_decl = ast->GetAsRecordDecl(type);
	if (!record_decl)
	return nullptr;

	clang::VarDecl *var_decl = nullptr;
	clang::IdentifierInfo *ident = nullptr;
	if (!name.empty())
	ident = &ast->getASTContext().Idents.get(name);

	var_decl = clang::VarDecl::CreateDeserialized(ast->getASTContext(), 0);
	var_decl->setDeclContext(record_decl);
	var_decl->setDeclName(ident);
	var_decl->setType(ClangUtil::GetQualType(var_type));
	var_decl->setStorageClass(clang::SC_Static);
	SetMemberOwningModule(var_decl, record_decl);
	if (!var_decl)
	return nullptr;

	var_decl->setAccess(
	TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access));
	record_decl->addDecl(var_decl);

	VerifyDecl(var_decl);

	return var_decl;
	}

	void TypeSystemClang::SetIntegerInitializerForVariable(
	VarDecl *var, const llvm::APInt &init_value) {
	assert(!var->hasInit() && "variable already initialized");

	clang::ASTContext &ast = var->getASTContext();
	QualType qt = var->getType();
	assert(qt->isIntegralOrEnumerationType() &&
	"only integer or enum types supported");
	// If the variable is an enum type, take the underlying integer type as
	// the type of the integer literal.
	if (const EnumType *enum_type = qt->getAs<EnumType>()) {
	const EnumDecl *enum_decl = enum_type->getDecl();
	qt = enum_decl->getIntegerType();
	}
	var->setInit(IntegerLiteral::Create(ast, init_value, qt.getUnqualifiedType(),
	SourceLocation()));
	}

	void TypeSystemClang::SetFloatingInitializerForVariable(
	clang::VarDecl *var, const llvm::APFloat &init_value) {
	assert(!var->hasInit() && "variable already initialized");

	clang::ASTContext &ast = var->getASTContext();
	QualType qt = var->getType();
	assert(qt->isFloatingType() && "only floating point types supported");
	var->setInit(FloatingLiteral::Create(
	ast, init_value, true, qt.getUnqualifiedType(), SourceLocation()));
	}

	clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
	lldb::opaque_compiler_type_t type, llvm::StringRef name,
	const char *mangled_name, const CompilerType &method_clang_type,
	lldb::AccessType access, bool is_virtual, bool is_static, bool is_inline,
	bool is_explicit, bool is_attr_used, bool is_artificial) {
	if (!type \|\| !method_clang_type.IsValid() \|\| name.empty())
	return nullptr;

	clang::QualType record_qual_type(GetCanonicalQualType(type));

	clang::CXXRecordDecl *cxx_record_decl =
	record_qual_type->getAsCXXRecordDecl();

	if (cxx_record_decl == nullptr)
	return nullptr;

	clang::QualType method_qual_type(ClangUtil::GetQualType(method_clang_type));

	clang::CXXMethodDecl *cxx_method_decl = nullptr;

	clang::DeclarationName decl_name(&getASTContext().Idents.get(name));

	const clang::FunctionType *function_type =
	llvm::dyn_cast<clang::FunctionType>(method_qual_type.getTypePtr());

	if (function_type == nullptr)
	return nullptr;

	const clang::FunctionProtoType *method_function_prototype(
	llvm::dyn_cast<clang::FunctionProtoType>(function_type));

	if (!method_function_prototype)
	return nullptr;

	unsigned int num_params = method_function_prototype->getNumParams();

	clang::CXXDestructorDecl *cxx_dtor_decl(nullptr);
	clang::CXXConstructorDecl *cxx_ctor_decl(nullptr);

	if (is_artificial)
	return nullptr; // skip everything artificial

	const clang::ExplicitSpecifier explicit_spec(
	nullptr /expr/, is_explicit ? clang::ExplicitSpecKind::ResolvedTrue
	: clang::ExplicitSpecKind::ResolvedFalse);

	if (name.startswith("~")) {
	cxx_dtor_decl =
	clang::CXXDestructorDecl::CreateDeserialized(getASTContext(), 0);
	cxx_dtor_decl->setDeclContext(cxx_record_decl);
	cxx_dtor_decl->setDeclName(
	getASTContext().DeclarationNames.getCXXDestructorName(
	getASTContext().getCanonicalType(record_qual_type)));
	cxx_dtor_decl->setType(method_qual_type);
	cxx_dtor_decl->setImplicit(is_artificial);
	cxx_dtor_decl->setInlineSpecified(is_inline);
	cxx_dtor_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
	cxx_method_decl = cxx_dtor_decl;
	} else if (decl_name == cxx_record_decl->getDeclName()) {
	cxx_ctor_decl = clang::CXXConstructorDecl::CreateDeserialized(
	getASTContext(), 0, 0);
	cxx_ctor_decl->setDeclContext(cxx_record_decl);
	cxx_ctor_decl->setDeclName(
	getASTContext().DeclarationNames.getCXXConstructorName(
	getASTContext().getCanonicalType(record_qual_type)));
	cxx_ctor_decl->setType(method_qual_type);
	cxx_ctor_decl->setImplicit(is_artificial);
	cxx_ctor_decl->setInlineSpecified(is_inline);
	cxx_ctor_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
	cxx_ctor_decl->setNumCtorInitializers(0);
	cxx_ctor_decl->setExplicitSpecifier(explicit_spec);
	cxx_method_decl = cxx_ctor_decl;
	} else {
	clang::StorageClass SC = is_static ? clang::SC_Static : clang::SC_None;
	clang::OverloadedOperatorKind op_kind = clang::NUM_OVERLOADED_OPERATORS;

	if (IsOperator(name, op_kind)) {
	if (op_kind != clang::NUM_OVERLOADED_OPERATORS) {
	// Check the number of operator parameters. Sometimes we have seen bad
	// DWARF that doesn't correctly describe operators and if we try to
	// create a method and add it to the class, clang will assert and
	// crash, so we need to make sure things are acceptable.
	const bool is_method = true;
	if (!TypeSystemClang::CheckOverloadedOperatorKindParameterCount(
	is_method, op_kind, num_params))
	return nullptr;
	cxx_method_decl =
	clang::CXXMethodDecl::CreateDeserialized(getASTContext(), 0);
	cxx_method_decl->setDeclContext(cxx_record_decl);
	cxx_method_decl->setDeclName(
	getASTContext().DeclarationNames.getCXXOperatorName(op_kind));
	cxx_method_decl->setType(method_qual_type);
	cxx_method_decl->setStorageClass(SC);
	cxx_method_decl->setInlineSpecified(is_inline);
	cxx_method_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
	} else if (num_params == 0) {
	// Conversion operators don't take params...
	auto *cxx_conversion_decl =
	clang::CXXConversionDecl::CreateDeserialized(getASTContext(), 0);
	cxx_conversion_decl->setDeclContext(cxx_record_decl);
	cxx_conversion_decl->setDeclName(
	getASTContext().DeclarationNames.getCXXConversionFunctionName(
	getASTContext().getCanonicalType(
	function_type->getReturnType())));
	cxx_conversion_decl->setType(method_qual_type);
	cxx_conversion_decl->setInlineSpecified(is_inline);
	cxx_conversion_decl->setExplicitSpecifier(explicit_spec);
	cxx_conversion_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
	cxx_method_decl = cxx_conversion_decl;
	}
	}

	if (cxx_method_decl == nullptr) {
	cxx_method_decl =
	clang::CXXMethodDecl::CreateDeserialized(getASTContext(), 0);
	cxx_method_decl->setDeclContext(cxx_record_decl);
	cxx_method_decl->setDeclName(decl_name);
	cxx_method_decl->setType(method_qual_type);
	cxx_method_decl->setInlineSpecified(is_inline);
	cxx_method_decl->setStorageClass(SC);
	cxx_method_decl->setConstexprKind(ConstexprSpecKind::Unspecified);
	}
	}
	SetMemberOwningModule(cxx_method_decl, cxx_record_decl);

	clang::AccessSpecifier access_specifier =
	TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access);

	cxx_method_decl->setAccess(access_specifier);
	cxx_method_decl->setVirtualAsWritten(is_virtual);

	if (is_attr_used)
	cxx_method_decl->addAttr(clang::UsedAttr::CreateImplicit(getASTContext()));

	if (mangled_name != nullptr) {
	cxx_method_decl->addAttr(clang::AsmLabelAttr::CreateImplicit(
	getASTContext(), mangled_name, /literal=/false));
	}

	// Populate the method decl with parameter decls

	llvm::SmallVector<clang::ParmVarDecl *, 12> params;

	for (unsigned param_index = 0; param_index < num_params; ++param_index) {
	params.push_back(clang::ParmVarDecl::Create(
	getASTContext(), cxx_method_decl, clang::SourceLocation(),
	clang::SourceLocation(),
	nullptr, // anonymous
	method_function_prototype->getParamType(param_index), nullptr,
	clang::SC_None, nullptr));
	}

	cxx_method_decl->setParams(llvm::ArrayRef<clang::ParmVarDecl *>(params));

	AddAccessSpecifierDecl(cxx_record_decl, getASTContext(),
	GetCXXRecordDeclAccess(cxx_record_decl),
	access_specifier);
	SetCXXRecordDeclAccess(cxx_record_decl, access_specifier);

	cxx_record_decl->addDecl(cxx_method_decl);

	// Sometimes the debug info will mention a constructor (default/copy/move),
	// destructor, or assignment operator (copy/move) but there won't be any
	// version of this in the code. So we check if the function was artificially
	// generated and if it is trivial and this lets the compiler/backend know
	// that it can inline the IR for these when it needs to and we can avoid a
	// "missing function" error when running expressions.

	if (is_artificial) {
	if (cxx_ctor_decl && ((cxx_ctor_decl->isDefaultConstructor() &&
	cxx_record_decl->hasTrivialDefaultConstructor()) \|\|
	(cxx_ctor_decl->isCopyConstructor() &&
	cxx_record_decl->hasTrivialCopyConstructor()) \|\|
	(cxx_ctor_decl->isMoveConstructor() &&
	cxx_record_decl->hasTrivialMoveConstructor()))) {
	cxx_ctor_decl->setDefaulted();
	cxx_ctor_decl->setTrivial(true);
	} else if (cxx_dtor_decl) {
	if (cxx_record_decl->hasTrivialDestructor()) {
	cxx_dtor_decl->setDefaulted();
	cxx_dtor_decl->setTrivial(true);
	}
	} else if ((cxx_method_decl->isCopyAssignmentOperator() &&
	cxx_record_decl->hasTrivialCopyAssignment()) \|\|
	(cxx_method_decl->isMoveAssignmentOperator() &&
	cxx_record_decl->hasTrivialMoveAssignment())) {
	cxx_method_decl->setDefaulted();
	cxx_method_decl->setTrivial(true);
	}
	}

	VerifyDecl(cxx_method_decl);

	return cxx_method_decl;
	}

	void TypeSystemClang::AddMethodOverridesForCXXRecordType(
	lldb::opaque_compiler_type_t type) {
	if (auto *record = GetAsCXXRecordDecl(type))
	for (auto *method : record->methods())
	addOverridesForMethod(method);
	}

	#pragma mark C++ Base Classes

	std::unique_ptr<clang::CXXBaseSpecifier>
	TypeSystemClang::CreateBaseClassSpecifier(lldb::opaque_compiler_type_t type,
	AccessType access, bool is_virtual,
	bool base_of_class) {
	if (!type)
	return nullptr;

	return std::make_unique<clang::CXXBaseSpecifier>(
	clang::SourceRange(), is_virtual, base_of_class,
	TypeSystemClang::ConvertAccessTypeToAccessSpecifier(access),
	getASTContext().getTrivialTypeSourceInfo(GetQualType(type)),
	clang::SourceLocation());
	}

	bool TypeSystemClang::TransferBaseClasses(
	lldb::opaque_compiler_type_t type,
	std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> bases) {
	if (!type)
	return false;
	clang::CXXRecordDecl *cxx_record_decl = GetAsCXXRecordDecl(type);
	if (!cxx_record_decl)
	return false;
	std::vector<clang::CXXBaseSpecifier *> raw_bases;
	raw_bases.reserve(bases.size());

	// Clang will make a copy of them, so it's ok that we pass pointers that we're
	// about to destroy.
	for (auto &b : bases)
	raw_bases.push_back(b.get());
	cxx_record_decl->setBases(raw_bases.data(), raw_bases.size());
	return true;
	}

	bool TypeSystemClang::SetObjCSuperClass(
	const CompilerType &type, const CompilerType &superclass_clang_type) {
	TypeSystemClang *ast =
	llvm::dyn_cast_or_null<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return false;
	clang::ASTContext &clang_ast = ast->getASTContext();

	if (type && superclass_clang_type.IsValid() &&
	superclass_clang_type.GetTypeSystem() == type.GetTypeSystem()) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	GetAsObjCInterfaceDecl(type);
	clang::ObjCInterfaceDecl *super_interface_decl =
	GetAsObjCInterfaceDecl(superclass_clang_type);
	if (class_interface_decl && super_interface_decl) {
	class_interface_decl->setSuperClass(clang_ast.getTrivialTypeSourceInfo(
	clang_ast.getObjCInterfaceType(super_interface_decl)));
	return true;
	}
	}
	return false;
	}

	bool TypeSystemClang::AddObjCClassProperty(
	const CompilerType &type, const char *property_name,
	const CompilerType &property_clang_type, clang::ObjCIvarDecl *ivar_decl,
	const char property_setter_name, const char property_getter_name,
	uint32_t property_attributes, ClangASTMetadata *metadata) {
	if (!type \|\| !property_clang_type.IsValid() \|\| property_name == nullptr \|\|
	property_name[0] == '\0')
	return false;
	TypeSystemClang *ast = llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return false;
	clang::ASTContext &clang_ast = ast->getASTContext();

	clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);
	if (!class_interface_decl)
	return false;

	CompilerType property_clang_type_to_access;

	if (property_clang_type.IsValid())
	property_clang_type_to_access = property_clang_type;
	else if (ivar_decl)
	property_clang_type_to_access = ast->GetType(ivar_decl->getType());

	if (!class_interface_decl \|\| !property_clang_type_to_access.IsValid())
	return false;

	clang::TypeSourceInfo *prop_type_source;
	if (ivar_decl)
	prop_type_source = clang_ast.getTrivialTypeSourceInfo(ivar_decl->getType());
	else
	prop_type_source = clang_ast.getTrivialTypeSourceInfo(
	ClangUtil::GetQualType(property_clang_type));

	clang::ObjCPropertyDecl *property_decl =
	clang::ObjCPropertyDecl::CreateDeserialized(clang_ast, 0);
	property_decl->setDeclContext(class_interface_decl);
	property_decl->setDeclName(&clang_ast.Idents.get(property_name));
	property_decl->setType(ivar_decl
	? ivar_decl->getType()
	: ClangUtil::GetQualType(property_clang_type),
	prop_type_source);
	SetMemberOwningModule(property_decl, class_interface_decl);

	if (!property_decl)
	return false;

	if (metadata)
	ast->SetMetadata(property_decl, *metadata);

	class_interface_decl->addDecl(property_decl);

	clang::Selector setter_sel, getter_sel;

	if (property_setter_name) {
	std::string property_setter_no_colon(property_setter_name,
	strlen(property_setter_name) - 1);
	clang::IdentifierInfo *setter_ident =
	&clang_ast.Idents.get(property_setter_no_colon);
	setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident);
	} else if (!(property_attributes & DW_APPLE_PROPERTY_readonly)) {
	std::string setter_sel_string("set");
	setter_sel_string.push_back(::toupper(property_name[0]));
	setter_sel_string.append(&property_name[1]);
	clang::IdentifierInfo *setter_ident =
	&clang_ast.Idents.get(setter_sel_string);
	setter_sel = clang_ast.Selectors.getSelector(1, &setter_ident);
	}
	property_decl->setSetterName(setter_sel);
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_setter);

	if (property_getter_name != nullptr) {
	clang::IdentifierInfo *getter_ident =
	&clang_ast.Idents.get(property_getter_name);
	getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident);
	} else {
	clang::IdentifierInfo *getter_ident = &clang_ast.Idents.get(property_name);
	getter_sel = clang_ast.Selectors.getSelector(0, &getter_ident);
	}
	property_decl->setGetterName(getter_sel);
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_getter);

	if (ivar_decl)
	property_decl->setPropertyIvarDecl(ivar_decl);

	if (property_attributes & DW_APPLE_PROPERTY_readonly)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_readonly);
	if (property_attributes & DW_APPLE_PROPERTY_readwrite)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_readwrite);
	if (property_attributes & DW_APPLE_PROPERTY_assign)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_assign);
	if (property_attributes & DW_APPLE_PROPERTY_retain)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_retain);
	if (property_attributes & DW_APPLE_PROPERTY_copy)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_copy);
	if (property_attributes & DW_APPLE_PROPERTY_nonatomic)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_nonatomic);
	if (property_attributes & ObjCPropertyAttribute::kind_nullability)
	property_decl->setPropertyAttributes(
	ObjCPropertyAttribute::kind_nullability);
	if (property_attributes & ObjCPropertyAttribute::kind_null_resettable)
	property_decl->setPropertyAttributes(
	ObjCPropertyAttribute::kind_null_resettable);
	if (property_attributes & ObjCPropertyAttribute::kind_class)
	property_decl->setPropertyAttributes(ObjCPropertyAttribute::kind_class);

	const bool isInstance =
	(property_attributes & ObjCPropertyAttribute::kind_class) == 0;

	clang::ObjCMethodDecl *getter = nullptr;
	if (!getter_sel.isNull())
	getter = isInstance ? class_interface_decl->lookupInstanceMethod(getter_sel)
	: class_interface_decl->lookupClassMethod(getter_sel);
	if (!getter_sel.isNull() && !getter) {
	const bool isVariadic = false;
	const bool isPropertyAccessor = true;
	const bool isSynthesizedAccessorStub = false;
	const bool isImplicitlyDeclared = true;
	const bool isDefined = false;
	const clang::ObjCMethodDecl::ImplementationControl impControl =
	clang::ObjCMethodDecl::None;
	const bool HasRelatedResultType = false;

	getter = clang::ObjCMethodDecl::CreateDeserialized(clang_ast, 0);
	getter->setDeclName(getter_sel);
	getter->setReturnType(ClangUtil::GetQualType(property_clang_type_to_access));
	getter->setDeclContext(class_interface_decl);
	getter->setInstanceMethod(isInstance);
	getter->setVariadic(isVariadic);
	getter->setPropertyAccessor(isPropertyAccessor);
	getter->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
	getter->setImplicit(isImplicitlyDeclared);
	getter->setDefined(isDefined);
	getter->setDeclImplementation(impControl);
	getter->setRelatedResultType(HasRelatedResultType);
	SetMemberOwningModule(getter, class_interface_decl);

	if (getter) {
	if (metadata)
	ast->SetMetadata(getter, *metadata);

	getter->setMethodParams(clang_ast, llvm::ArrayRef<clang::ParmVarDecl *>(),
	llvm::ArrayRef<clang::SourceLocation>());
	class_interface_decl->addDecl(getter);
	}
	}
	if (getter) {
	getter->setPropertyAccessor(true);
	property_decl->setGetterMethodDecl(getter);
	}

	clang::ObjCMethodDecl *setter = nullptr;
	setter = isInstance ? class_interface_decl->lookupInstanceMethod(setter_sel)
	: class_interface_decl->lookupClassMethod(setter_sel);
	if (!setter_sel.isNull() && !setter) {
	clang::QualType result_type = clang_ast.VoidTy;
	const bool isVariadic = false;
	const bool isPropertyAccessor = true;
	const bool isSynthesizedAccessorStub = false;
	const bool isImplicitlyDeclared = true;
	const bool isDefined = false;
	const clang::ObjCMethodDecl::ImplementationControl impControl =
	clang::ObjCMethodDecl::None;
	const bool HasRelatedResultType = false;

	setter = clang::ObjCMethodDecl::CreateDeserialized(clang_ast, 0);
	setter->setDeclName(setter_sel);
	setter->setReturnType(result_type);
	setter->setDeclContext(class_interface_decl);
	setter->setInstanceMethod(isInstance);
	setter->setVariadic(isVariadic);
	setter->setPropertyAccessor(isPropertyAccessor);
	setter->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
	setter->setImplicit(isImplicitlyDeclared);
	setter->setDefined(isDefined);
	setter->setDeclImplementation(impControl);
	setter->setRelatedResultType(HasRelatedResultType);
	SetMemberOwningModule(setter, class_interface_decl);

	if (setter) {
	if (metadata)
	ast->SetMetadata(setter, *metadata);

	llvm::SmallVector<clang::ParmVarDecl *, 1> params;
	params.push_back(clang::ParmVarDecl::Create(
	clang_ast, setter, clang::SourceLocation(), clang::SourceLocation(),
	nullptr, // anonymous
	ClangUtil::GetQualType(property_clang_type_to_access), nullptr,
	clang::SC_Auto, nullptr));

	setter->setMethodParams(clang_ast,
	llvm::ArrayRef<clang::ParmVarDecl *>(params),
	llvm::ArrayRef<clang::SourceLocation>());

	class_interface_decl->addDecl(setter);
	}
	}
	if (setter) {
	setter->setPropertyAccessor(true);
	property_decl->setSetterMethodDecl(setter);
	}

	return true;
	}

	bool TypeSystemClang::IsObjCClassTypeAndHasIVars(const CompilerType &type,
	bool check_superclass) {
	clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);
	if (class_interface_decl)
	return ObjCDeclHasIVars(class_interface_decl, check_superclass);
	return false;
	}

	clang::ObjCMethodDecl *TypeSystemClang::AddMethodToObjCObjectType(
	const CompilerType &type,
	const char *name, // the full symbol name as seen in the symbol table
	// (lldb::opaque_compiler_type_t type, "-[NString
	// stringWithCString:]")
	const CompilerType &method_clang_type, lldb::AccessType access,
	bool is_artificial, bool is_variadic, bool is_objc_direct_call) {
	if (!type \|\| !method_clang_type.IsValid())
	return nullptr;

	clang::ObjCInterfaceDecl *class_interface_decl = GetAsObjCInterfaceDecl(type);

	if (class_interface_decl == nullptr)
	return nullptr;
	TypeSystemClang *lldb_ast =
	llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (lldb_ast == nullptr)
	return nullptr;
	clang::ASTContext &ast = lldb_ast->getASTContext();

	const char *selector_start = ::strchr(name, ' ');
	if (selector_start == nullptr)
	return nullptr;

	selector_start++;
	llvm::SmallVector<clang::IdentifierInfo *, 12> selector_idents;

	size_t len = 0;
	const char *start;

	unsigned num_selectors_with_args = 0;
	for (start = selector_start; start && start != '\0' && start != ']';
	start += len) {
	len = ::strcspn(start, ":]");
	bool has_arg = (start[len] == ':');
	if (has_arg)
	++num_selectors_with_args;
	selector_idents.push_back(&ast.Idents.get(llvm::StringRef(start, len)));
	if (has_arg)
	len += 1;
	}

	if (selector_idents.size() == 0)
	return nullptr;

	clang::Selector method_selector = ast.Selectors.getSelector(
	num_selectors_with_args ? selector_idents.size() : 0,
	selector_idents.data());

	clang::QualType method_qual_type(ClangUtil::GetQualType(method_clang_type));

	// Populate the method decl with parameter decls
	const clang::Type *method_type(method_qual_type.getTypePtr());

	if (method_type == nullptr)
	return nullptr;

	const clang::FunctionProtoType *method_function_prototype(
	llvm::dyn_cast<clang::FunctionProtoType>(method_type));

	if (!method_function_prototype)
	return nullptr;

	const bool isInstance = (name[0] == '-');
	const bool isVariadic = is_variadic;
	const bool isPropertyAccessor = false;
	const bool isSynthesizedAccessorStub = false;
	/// Force this to true because we don't have source locations.
	const bool isImplicitlyDeclared = true;
	const bool isDefined = false;
	const clang::ObjCMethodDecl::ImplementationControl impControl =
	clang::ObjCMethodDecl::None;
	const bool HasRelatedResultType = false;

	const unsigned num_args = method_function_prototype->getNumParams();

	if (num_args != num_selectors_with_args)
	return nullptr; // some debug information is corrupt. We are not going to
	// deal with it.

	auto *objc_method_decl = clang::ObjCMethodDecl::CreateDeserialized(ast, 0);
	objc_method_decl->setDeclName(method_selector);
	objc_method_decl->setReturnType(method_function_prototype->getReturnType());
	objc_method_decl->setDeclContext(
	lldb_ast->GetDeclContextForType(ClangUtil::GetQualType(type)));
	objc_method_decl->setInstanceMethod(isInstance);
	objc_method_decl->setVariadic(isVariadic);
	objc_method_decl->setPropertyAccessor(isPropertyAccessor);
	objc_method_decl->setSynthesizedAccessorStub(isSynthesizedAccessorStub);
	objc_method_decl->setImplicit(isImplicitlyDeclared);
	objc_method_decl->setDefined(isDefined);
	objc_method_decl->setDeclImplementation(impControl);
	objc_method_decl->setRelatedResultType(HasRelatedResultType);
	SetMemberOwningModule(objc_method_decl, class_interface_decl);

	if (objc_method_decl == nullptr)
	return nullptr;

	if (num_args > 0) {
	llvm::SmallVector<clang::ParmVarDecl *, 12> params;

	for (unsigned param_index = 0; param_index < num_args; ++param_index) {
	params.push_back(clang::ParmVarDecl::Create(
	ast, objc_method_decl, clang::SourceLocation(),
	clang::SourceLocation(),
	nullptr, // anonymous
	method_function_prototype->getParamType(param_index), nullptr,
	clang::SC_Auto, nullptr));
	}

	objc_method_decl->setMethodParams(
	ast, llvm::ArrayRef<clang::ParmVarDecl *>(params),
	llvm::ArrayRef<clang::SourceLocation>());
	}

	if (is_objc_direct_call) {
	// Add a the objc_direct attribute to the declaration we generate that
	// we generate a direct method call for this ObjCMethodDecl.
	objc_method_decl->addAttr(
	clang::ObjCDirectAttr::CreateImplicit(ast, SourceLocation()));
	// Usually Sema is creating implicit parameters (e.g., self) when it
	// parses the method. We don't have a parsing Sema when we build our own
	// AST here so we manually need to create these implicit parameters to
	// make the direct call code generation happy.
	objc_method_decl->createImplicitParams(ast, class_interface_decl);
	}

	class_interface_decl->addDecl(objc_method_decl);

	VerifyDecl(objc_method_decl);

	return objc_method_decl;
	}

	bool TypeSystemClang::SetHasExternalStorage(lldb::opaque_compiler_type_t type,
	bool has_extern) {
	if (!type)
	return false;

	clang::QualType qual_type(RemoveWrappingTypes(GetCanonicalQualType(type)));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record: {
	clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl) {
	cxx_record_decl->setHasExternalLexicalStorage(has_extern);
	cxx_record_decl->setHasExternalVisibleStorage(has_extern);
	return true;
	}
	} break;

	case clang::Type::Enum: {
	clang::EnumDecl *enum_decl =
	llvm::cast<clang::EnumType>(qual_type)->getDecl();
	if (enum_decl) {
	enum_decl->setHasExternalLexicalStorage(has_extern);
	enum_decl->setHasExternalVisibleStorage(has_extern);
	return true;
	}
	} break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface: {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();

	if (class_interface_decl) {
	class_interface_decl->setHasExternalLexicalStorage(has_extern);
	class_interface_decl->setHasExternalVisibleStorage(has_extern);
	return true;
	}
	}
	} break;

	default:
	break;
	}
	return false;
	}

	#pragma mark TagDecl

	bool TypeSystemClang::StartTagDeclarationDefinition(const CompilerType &type) {
	clang::QualType qual_type(ClangUtil::GetQualType(type));
	if (!qual_type.isNull()) {
	const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
	if (tag_type) {
	clang::TagDecl *tag_decl = tag_type->getDecl();
	if (tag_decl) {
	tag_decl->startDefinition();
	return true;
	}
	}

	const clang::ObjCObjectType *object_type =
	qual_type->getAs<clang::ObjCObjectType>();
	if (object_type) {
	clang::ObjCInterfaceDecl *interface_decl = object_type->getInterface();
	if (interface_decl) {
	interface_decl->startDefinition();
	return true;
	}
	}
	}
	return false;
	}

	bool TypeSystemClang::CompleteTagDeclarationDefinition(
	const CompilerType &type) {
	clang::QualType qual_type(ClangUtil::GetQualType(type));
	if (qual_type.isNull())
	return false;

	TypeSystemClang *lldb_ast =
	llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (lldb_ast == nullptr)
	return false;

	// Make sure we use the same methodology as
	// TypeSystemClang::StartTagDeclarationDefinition() as to how we start/end
	// the definition.
	const clang::TagType *tag_type = qual_type->getAs<clang::TagType>();
	if (tag_type) {
	clang::TagDecl *tag_decl = tag_type->getDecl();

	if (auto *cxx_record_decl = llvm::dyn_cast<CXXRecordDecl>(tag_decl)) {
	// If we have a move constructor declared but no copy constructor we
	// need to explicitly mark it as deleted. Usually Sema would do this for
	// us in Sema::DeclareImplicitCopyConstructor but we don't have a Sema
	// when building an AST from debug information.
	// See also:
	// C++11 [class.copy]p7, p18:
	// If the class definition declares a move constructor or move assignment
	// operator, an implicitly declared copy constructor or copy assignment
	// operator is defined as deleted.
	if (cxx_record_decl->hasUserDeclaredMoveConstructor() \|\|
	cxx_record_decl->hasUserDeclaredMoveAssignment()) {
	if (cxx_record_decl->needsImplicitCopyConstructor())
	cxx_record_decl->setImplicitCopyConstructorIsDeleted();
	if (cxx_record_decl->needsImplicitCopyAssignment())
	cxx_record_decl->setImplicitCopyAssignmentIsDeleted();
	}

	if (!cxx_record_decl->isCompleteDefinition())
	cxx_record_decl->completeDefinition();
	cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true);
	cxx_record_decl->setHasExternalLexicalStorage(false);
	cxx_record_decl->setHasExternalVisibleStorage(false);
	lldb_ast->SetCXXRecordDeclAccess(cxx_record_decl,
	clang::AccessSpecifier::AS_none);
	return true;
	}
	}

	const clang::EnumType *enutype = qual_type->getAs<clang::EnumType>();

	if (!enutype)
	return false;
	clang::EnumDecl *enum_decl = enutype->getDecl();

	if (enum_decl->isCompleteDefinition())
	return true;

	clang::ASTContext &ast = lldb_ast->getASTContext();

	/// TODO This really needs to be fixed.

	QualType integer_type(enum_decl->getIntegerType());
	if (!integer_type.isNull()) {
	unsigned NumPositiveBits = 1;
	unsigned NumNegativeBits = 0;

	clang::QualType promotion_qual_type;
	// If the enum integer type is less than an integer in bit width,
	// then we must promote it to an integer size.
	if (ast.getTypeSize(enum_decl->getIntegerType()) <
	ast.getTypeSize(ast.IntTy)) {
	if (enum_decl->getIntegerType()->isSignedIntegerType())
	promotion_qual_type = ast.IntTy;
	else
	promotion_qual_type = ast.UnsignedIntTy;
	} else
	promotion_qual_type = enum_decl->getIntegerType();

	enum_decl->completeDefinition(enum_decl->getIntegerType(),
	promotion_qual_type, NumPositiveBits,
	NumNegativeBits);
	}
	return true;
	}

	clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType(
	const CompilerType &enum_type, const Declaration &decl, const char *name,
	const llvm::APSInt &value) {

	if (!enum_type \|\| ConstString(name).IsEmpty())
	return nullptr;

	lldbassert(enum_type.GetTypeSystem() == static_cast<TypeSystem *>(this));

	lldb::opaque_compiler_type_t enum_opaque_compiler_type =
	enum_type.GetOpaqueQualType();

	if (!enum_opaque_compiler_type)
	return nullptr;

	clang::QualType enum_qual_type(
	GetCanonicalQualType(enum_opaque_compiler_type));

	const clang::Type *clang_type = enum_qual_type.getTypePtr();

	if (!clang_type)
	return nullptr;

	const clang::EnumType *enutype = llvm::dyn_cast<clang::EnumType>(clang_type);

	if (!enutype)
	return nullptr;

	clang::EnumConstantDecl *enumerator_decl =
	clang::EnumConstantDecl::CreateDeserialized(getASTContext(), 0);
	enumerator_decl->setDeclContext(enutype->getDecl());
	if (name && name[0])
	enumerator_decl->setDeclName(&getASTContext().Idents.get(name));
	enumerator_decl->setType(clang::QualType(enutype, 0));
	enumerator_decl->setInitVal(value);
	SetMemberOwningModule(enumerator_decl, enutype->getDecl());

	if (!enumerator_decl)
	return nullptr;

	enutype->getDecl()->addDecl(enumerator_decl);

	VerifyDecl(enumerator_decl);
	return enumerator_decl;
	}

	clang::EnumConstantDecl *TypeSystemClang::AddEnumerationValueToEnumerationType(
	const CompilerType &enum_type, const Declaration &decl, const char *name,
	int64_t enum_value, uint32_t enum_value_bit_size) {
	CompilerType underlying_type = GetEnumerationIntegerType(enum_type);
	bool is_signed = false;
	underlying_type.IsIntegerType(is_signed);

	llvm::APSInt value(enum_value_bit_size, is_signed);
	value = enum_value;

	return AddEnumerationValueToEnumerationType(enum_type, decl, name, value);
	}

	CompilerType TypeSystemClang::GetEnumerationIntegerType(CompilerType type) {
	clang::QualType qt(ClangUtil::GetQualType(type));
	const clang::Type *clang_type = qt.getTypePtrOrNull();
	const auto *enum_type = llvm::dyn_cast_or_null<clang::EnumType>(clang_type);
	if (!enum_type)
	return CompilerType();

	return GetType(enum_type->getDecl()->getIntegerType());
	}

	CompilerType
	TypeSystemClang::CreateMemberPointerType(const CompilerType &type,
	const CompilerType &pointee_type) {
	if (type && pointee_type.IsValid() &&
	type.GetTypeSystem() == pointee_type.GetTypeSystem()) {
	TypeSystemClang *ast =
	llvm::dyn_cast<TypeSystemClang>(type.GetTypeSystem());
	if (!ast)
	return CompilerType();
	return ast->GetType(ast->getASTContext().getMemberPointerType(
	ClangUtil::GetQualType(pointee_type),
	ClangUtil::GetQualType(type).getTypePtr()));
	}
	return CompilerType();
	}

	// Dumping types
	#define DEPTH_INCREMENT 2

	#ifndef NDEBUG
	LLVM_DUMP_METHOD void
	TypeSystemClang::dump(lldb::opaque_compiler_type_t type) const {
	if (!type)
	return;
	clang::QualType qual_type(GetQualType(type));
	qual_type.dump();
	}
	#endif

	void TypeSystemClang::Dump(llvm::raw_ostream &output) {
	GetTranslationUnitDecl()->dump(output);
	}

	void TypeSystemClang::DumpFromSymbolFile(Stream &s,
	llvm::StringRef symbol_name) {
	SymbolFile *symfile = GetSymbolFile();

	if (!symfile)
	return;

	lldb_private::TypeList type_list;
	symfile->GetTypes(nullptr, eTypeClassAny, type_list);
	size_t ntypes = type_list.GetSize();

	for (size_t i = 0; i < ntypes; ++i) {
	TypeSP type = type_list.GetTypeAtIndex(i);

	if (!symbol_name.empty())
	if (symbol_name != type->GetName().GetStringRef())
	continue;

	s << type->GetName().AsCString() << "\n";

	CompilerType full_type = type->GetFullCompilerType();
	if (clang::TagDecl *tag_decl = GetAsTagDecl(full_type)) {
	tag_decl->dump(s.AsRawOstream());
	continue;
	}
	if (clang::TypedefNameDecl *typedef_decl = GetAsTypedefDecl(full_type)) {
	typedef_decl->dump(s.AsRawOstream());
	continue;
	}
	if (auto *objc_obj = llvm::dyn_cast<clang::ObjCObjectType>(
	ClangUtil::GetQualType(full_type).getTypePtr())) {
	if (clang::ObjCInterfaceDecl *interface_decl = objc_obj->getInterface()) {
	interface_decl->dump(s.AsRawOstream());
	continue;
	}
	}
	GetCanonicalQualType(full_type.GetOpaqueQualType())
	.dump(s.AsRawOstream(), getASTContext());
	}
	}

	void TypeSystemClang::DumpValue(
	lldb::opaque_compiler_type_t type, ExecutionContext exe_ctx, Stream s,
	lldb::Format format, const lldb_private::DataExtractor &data,
	lldb::offset_t data_byte_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, bool show_types,
	bool show_summary, bool verbose, uint32_t depth) {
	if (!type)
	return;

	clang::QualType qual_type(GetQualType(type));
	switch (qual_type->getTypeClass()) {
	case clang::Type::Record:
	if (GetCompleteType(type)) {
	const clang::RecordType *record_type =
	llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	assert(record_decl);
	uint32_t field_bit_offset = 0;
	uint32_t field_byte_offset = 0;
	const clang::ASTRecordLayout &record_layout =
	getASTContext().getASTRecordLayout(record_decl);
	uint32_t child_idx = 0;

	const clang::CXXRecordDecl *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
	if (cxx_record_decl) {
	// We might have base classes to print out first
	clang::CXXRecordDecl::base_class_const_iterator base_class,
	base_class_end;
	for (base_class = cxx_record_decl->bases_begin(),
	base_class_end = cxx_record_decl->bases_end();
	base_class != base_class_end; ++base_class) {
	const clang::CXXRecordDecl *base_class_decl =
	llvm::cast<clang::CXXRecordDecl>(
	base_class->getType()->getAs<clang::RecordType>()->getDecl());

	// Skip empty base classes
	if (!verbose && !TypeSystemClang::RecordHasFields(base_class_decl))
	continue;

	if (base_class->isVirtual())
	field_bit_offset =
	record_layout.getVBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	else
	field_bit_offset = record_layout.getBaseClassOffset(base_class_decl)
	.getQuantity() *
	8;
	field_byte_offset = field_bit_offset / 8;
	assert(field_bit_offset % 8 == 0);
	if (child_idx == 0)
	s->PutChar('{');
	else
	s->PutChar(',');

	clang::QualType base_class_qual_type = base_class->getType();
	std::string base_class_type_name(base_class_qual_type.getAsString());

	// Indent and print the base class type name
	s->Format("\n{0}{1}", llvm::fmt_repeat(" ", depth + DEPTH_INCREMENT),
	base_class_type_name);

	clang::TypeInfo base_class_type_info =
	getASTContext().getTypeInfo(base_class_qual_type);

	// Dump the value of the member
	CompilerType base_clang_type = GetType(base_class_qual_type);
	base_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	base_clang_type
	.GetFormat(), // The format with which to display the member
	data, // Data buffer containing all bytes for this type
	data_byte_offset + field_byte_offset, // Offset into "data" where
	// to grab value from
	base_class_type_info.Width / 8, // Size of this type in bytes
	0, // Bitfield bit size
	0, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable
	// types
	show_summary, // Boolean indicating if we should show a summary
	// for the current type
	verbose, // Verbose output?
	depth + DEPTH_INCREMENT); // Scope depth for any types that have
	// children

	++child_idx;
	}
	}
	uint32_t field_idx = 0;
	clang::RecordDecl::field_iterator field, field_end;
	for (field = record_decl->field_begin(),
	field_end = record_decl->field_end();
	field != field_end; ++field, ++field_idx, ++child_idx) {
	// Print the starting squiggly bracket (if this is the first member) or
	// comma (for member 2 and beyond) for the struct/union/class member.
	if (child_idx == 0)
	s->PutChar('{');
	else
	s->PutChar(',');

	// Indent
	s->Printf("\n%*s", depth + DEPTH_INCREMENT, "");

	clang::QualType field_type = field->getType();
	// Print the member type if requested
	// Figure out the type byte size (field_type_info.first) and alignment
	// (field_type_info.second) from the AST context.
	clang::TypeInfo field_type_info =
	getASTContext().getTypeInfo(field_type);
	assert(field_idx < record_layout.getFieldCount());
	// Figure out the field offset within the current struct/union/class
	// type
	field_bit_offset = record_layout.getFieldOffset(field_idx);
	field_byte_offset = field_bit_offset / 8;
	uint32_t field_bitfield_bit_size = 0;
	uint32_t field_bitfield_bit_offset = 0;
	if (FieldIsBitfield(*field, field_bitfield_bit_size))
	field_bitfield_bit_offset = field_bit_offset % 8;

	if (show_types) {
	std::string field_type_name(field_type.getAsString());
	if (field_bitfield_bit_size > 0)
	s->Printf("(%s:%u) ", field_type_name.c_str(),
	field_bitfield_bit_size);
	else
	s->Printf("(%s) ", field_type_name.c_str());
	}
	// Print the member name and equal sign
	s->Printf("%s = ", field->getNameAsString().c_str());

	// Dump the value of the member
	CompilerType field_clang_type = GetType(field_type);
	field_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	field_clang_type
	.GetFormat(), // The format with which to display the member
	data, // Data buffer containing all bytes for this type
	data_byte_offset + field_byte_offset, // Offset into "data" where to
	// grab value from
	field_type_info.Width / 8, // Size of this type in bytes
	field_bitfield_bit_size, // Bitfield bit size
	field_bitfield_bit_offset, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable
	// types
	show_summary, // Boolean indicating if we should show a summary for
	// the current type
	verbose, // Verbose output?
	depth + DEPTH_INCREMENT); // Scope depth for any types that have
	// children
	}

	// Indent the trailing squiggly bracket
	if (child_idx > 0)
	s->Printf("\n%*s}", depth, "");
	}
	return;

	case clang::Type::Enum:
	if (GetCompleteType(type)) {
	const clang::EnumType *enutype =
	llvm::cast<clang::EnumType>(qual_type.getTypePtr());
	const clang::EnumDecl *enum_decl = enutype->getDecl();
	assert(enum_decl);
	clang::EnumDecl::enumerator_iterator enum_pos, enum_end_pos;
	lldb::offset_t offset = data_byte_offset;
	const int64_t enum_value = data.GetMaxU64Bitfield(
	&offset, data_byte_size, bitfield_bit_size, bitfield_bit_offset);
	for (enum_pos = enum_decl->enumerator_begin(),
	enum_end_pos = enum_decl->enumerator_end();
	enum_pos != enum_end_pos; ++enum_pos) {
	if (enum_pos->getInitVal() == enum_value) {
	s->Printf("%s", enum_pos->getNameAsString().c_str());
	return;
	}
	}
	// If we have gotten here we didn't get find the enumerator in the enum
	// decl, so just print the integer.
	s->Printf("%" PRIi64, enum_value);
	}
	return;

	case clang::Type::ConstantArray: {
	const clang::ConstantArrayType *array =
	llvm::cast<clang::ConstantArrayType>(qual_type.getTypePtr());
	bool is_array_of_characters = false;
	clang::QualType element_qual_type = array->getElementType();

	const clang::Type *canonical_type =
	element_qual_type->getCanonicalTypeInternal().getTypePtr();
	if (canonical_type)
	is_array_of_characters = canonical_type->isCharType();

	const uint64_t element_count = array->getSize().getLimitedValue();

	clang::TypeInfo field_type_info =
	getASTContext().getTypeInfo(element_qual_type);

	uint32_t element_idx = 0;
	uint32_t element_offset = 0;
	uint64_t element_byte_size = field_type_info.Width / 8;
	uint32_t element_stride = element_byte_size;

	if (is_array_of_characters) {
	s->PutChar('"');
	DumpDataExtractor(data, s, data_byte_offset, lldb::eFormatChar,
	element_byte_size, element_count, UINT32_MAX,
	LLDB_INVALID_ADDRESS, 0, 0);
	s->PutChar('"');
	return;
	} else {
	CompilerType element_clang_type = GetType(element_qual_type);
	lldb::Format element_format = element_clang_type.GetFormat();

	for (element_idx = 0; element_idx < element_count; ++element_idx) {
	// Print the starting squiggly bracket (if this is the first member) or
	// comman (for member 2 and beyong) for the struct/union/class member.
	if (element_idx == 0)
	s->PutChar('{');
	else
	s->PutChar(',');

	// Indent and print the index
	s->Printf("\n%*s[%u] ", depth + DEPTH_INCREMENT, "", element_idx);

	// Figure out the field offset within the current struct/union/class
	// type
	element_offset = element_idx * element_stride;

	// Dump the value of the member
	element_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	element_format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	data_byte_offset +
	element_offset, // Offset into "data" where to grab value from
	element_byte_size, // Size of this type in bytes
	0, // Bitfield bit size
	0, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable
	// types
	show_summary, // Boolean indicating if we should show a summary for
	// the current type
	verbose, // Verbose output?
	depth + DEPTH_INCREMENT); // Scope depth for any types that have
	// children
	}

	// Indent the trailing squiggly bracket
	if (element_idx > 0)
	s->Printf("\n%*s}", depth, "");
	}
	}
	return;

	case clang::Type::Typedef: {
	clang::QualType typedef_qual_type =
	llvm::cast<clang::TypedefType>(qual_type)
	->getDecl()
	->getUnderlyingType();

	CompilerType typedef_clang_type = GetType(typedef_qual_type);
	lldb::Format typedef_format = typedef_clang_type.GetFormat();
	clang::TypeInfo typedef_type_info =
	getASTContext().getTypeInfo(typedef_qual_type);
	uint64_t typedef_byte_size = typedef_type_info.Width / 8;

	return typedef_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	typedef_format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	data_byte_offset, // Offset into "data" where to grab value from
	typedef_byte_size, // Size of this type in bytes
	bitfield_bit_size, // Bitfield bit size
	bitfield_bit_offset, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable types
	show_summary, // Boolean indicating if we should show a summary for the
	// current type
	verbose, // Verbose output?
	depth); // Scope depth for any types that have children
	} break;

	case clang::Type::Auto: {
	clang::QualType elaborated_qual_type =
	llvm::cast<clang::AutoType>(qual_type)->getDeducedType();
	CompilerType elaborated_clang_type = GetType(elaborated_qual_type);
	lldb::Format elaborated_format = elaborated_clang_type.GetFormat();
	clang::TypeInfo elaborated_type_info =
	getASTContext().getTypeInfo(elaborated_qual_type);
	uint64_t elaborated_byte_size = elaborated_type_info.Width / 8;

	return elaborated_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	elaborated_format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	data_byte_offset, // Offset into "data" where to grab value from
	elaborated_byte_size, // Size of this type in bytes
	bitfield_bit_size, // Bitfield bit size
	bitfield_bit_offset, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable types
	show_summary, // Boolean indicating if we should show a summary for the
	// current type
	verbose, // Verbose output?
	depth); // Scope depth for any types that have children
	} break;

	case clang::Type::Elaborated: {
	clang::QualType elaborated_qual_type =
	llvm::cast<clang::ElaboratedType>(qual_type)->getNamedType();
	CompilerType elaborated_clang_type = GetType(elaborated_qual_type);
	lldb::Format elaborated_format = elaborated_clang_type.GetFormat();
	clang::TypeInfo elaborated_type_info =
	getASTContext().getTypeInfo(elaborated_qual_type);
	uint64_t elaborated_byte_size = elaborated_type_info.Width / 8;

	return elaborated_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	elaborated_format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	data_byte_offset, // Offset into "data" where to grab value from
	elaborated_byte_size, // Size of this type in bytes
	bitfield_bit_size, // Bitfield bit size
	bitfield_bit_offset, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable types
	show_summary, // Boolean indicating if we should show a summary for the
	// current type
	verbose, // Verbose output?
	depth); // Scope depth for any types that have children
	} break;

	case clang::Type::Paren: {
	clang::QualType desugar_qual_type =
	llvm::cast<clang::ParenType>(qual_type)->desugar();
	CompilerType desugar_clang_type = GetType(desugar_qual_type);

	lldb::Format desugar_format = desugar_clang_type.GetFormat();
	clang::TypeInfo desugar_type_info =
	getASTContext().getTypeInfo(desugar_qual_type);
	uint64_t desugar_byte_size = desugar_type_info.Width / 8;

	return desugar_clang_type.DumpValue(
	exe_ctx,
	s, // Stream to dump to
	desugar_format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	data_byte_offset, // Offset into "data" where to grab value from
	desugar_byte_size, // Size of this type in bytes
	bitfield_bit_size, // Bitfield bit size
	bitfield_bit_offset, // Bitfield bit offset
	show_types, // Boolean indicating if we should show the variable types
	show_summary, // Boolean indicating if we should show a summary for the
	// current type
	verbose, // Verbose output?
	depth); // Scope depth for any types that have children
	} break;

	default:
	// We are down to a scalar type that we just need to display.
	DumpDataExtractor(data, s, data_byte_offset, format, data_byte_size, 1,
	UINT32_MAX, LLDB_INVALID_ADDRESS, bitfield_bit_size,
	bitfield_bit_offset);

	if (show_summary)
	DumpSummary(type, exe_ctx, s, data, data_byte_offset, data_byte_size);
	break;
	}
	}

	static bool DumpEnumValue(const clang::QualType &qual_type, Stream *s,
	const DataExtractor &data, lldb::offset_t byte_offset,
	size_t byte_size, uint32_t bitfield_bit_offset,
	uint32_t bitfield_bit_size) {
	const clang::EnumType *enutype =
	llvm::cast<clang::EnumType>(qual_type.getTypePtr());
	const clang::EnumDecl *enum_decl = enutype->getDecl();
	assert(enum_decl);
	lldb::offset_t offset = byte_offset;
	const uint64_t enum_svalue = data.GetMaxS64Bitfield(
	&offset, byte_size, bitfield_bit_size, bitfield_bit_offset);
	bool can_be_bitfield = true;
	uint64_t covered_bits = 0;
	int num_enumerators = 0;

	// Try to find an exact match for the value.
	// At the same time, we're applying a heuristic to determine whether we want
	// to print this enum as a bitfield. We're likely dealing with a bitfield if
	// every enumerator is either a one bit value or a superset of the previous
	// enumerators. Also 0 doesn't make sense when the enumerators are used as
	// flags.
	for (auto *enumerator : enum_decl->enumerators()) {
	uint64_t val = enumerator->getInitVal().getSExtValue();
	val = llvm::SignExtend64(val, 8*byte_size);
	if (llvm::countPopulation(val) != 1 && (val & ~covered_bits) != 0)
	can_be_bitfield = false;
	covered_bits \|= val;
	++num_enumerators;
	if (val == enum_svalue) {
	// Found an exact match, that's all we need to do.
	s->PutCString(enumerator->getNameAsString());
	return true;
	}
	}

	// Unsigned values make more sense for flags.
	offset = byte_offset;
	const uint64_t enum_uvalue = data.GetMaxU64Bitfield(
	&offset, byte_size, bitfield_bit_size, bitfield_bit_offset);

	// No exact match, but we don't think this is a bitfield. Print the value as
	// decimal.
	if (!can_be_bitfield) {
	if (qual_type->isSignedIntegerOrEnumerationType())
	s->Printf("%" PRIi64, enum_svalue);
	else
	s->Printf("%" PRIu64, enum_uvalue);
	return true;
	}

	uint64_t remaining_value = enum_uvalue;
	std::vector<std::pair<uint64_t, llvm::StringRef>> values;
	values.reserve(num_enumerators);
	for (auto *enumerator : enum_decl->enumerators())
	if (auto val = enumerator->getInitVal().getZExtValue())
	values.emplace_back(val, enumerator->getName());

	// Sort in reverse order of the number of the population count, so that in
	// `enum {A, B, ALL = A\|B }` we visit ALL first. Use a stable sort so that
	// A \| C where A is declared before C is displayed in this order.
	std::stable_sort(values.begin(), values.end(), [](const auto &a, const auto &b) {
	return llvm::countPopulation(a.first) > llvm::countPopulation(b.first);
	});

	for (const auto &val : values) {
	if ((remaining_value & val.first) != val.first)
	continue;
	remaining_value &= ~val.first;
	s->PutCString(val.second);
	if (remaining_value)
	s->PutCString(" \| ");
	}

	// If there is a remainder that is not covered by the value, print it as hex.
	if (remaining_value)
	s->Printf("0x%" PRIx64, remaining_value);

	return true;
	}

	bool TypeSystemClang::DumpTypeValue(
	lldb::opaque_compiler_type_t type, Stream *s, lldb::Format format,
	const lldb_private::DataExtractor &data, lldb::offset_t byte_offset,
	size_t byte_size, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
	ExecutionContextScope *exe_scope) {
	if (!type)
	return false;
	if (IsAggregateType(type)) {
	return false;
	} else {
	clang::QualType qual_type(GetQualType(type));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();

	if (type_class == clang::Type::Elaborated) {
	qual_type = llvm::cast<clang::ElaboratedType>(qual_type)->getNamedType();
	return DumpTypeValue(qual_type.getAsOpaquePtr(), s, format, data, byte_offset, byte_size,
	bitfield_bit_size, bitfield_bit_offset, exe_scope);
	}

	switch (type_class) {
	case clang::Type::Typedef: {
	clang::QualType typedef_qual_type =
	llvm::cast<clang::TypedefType>(qual_type)
	->getDecl()
	->getUnderlyingType();
	CompilerType typedef_clang_type = GetType(typedef_qual_type);
	if (format == eFormatDefault)
	format = typedef_clang_type.GetFormat();
	clang::TypeInfo typedef_type_info =
	getASTContext().getTypeInfo(typedef_qual_type);
	uint64_t typedef_byte_size = typedef_type_info.Width / 8;

	return typedef_clang_type.DumpTypeValue(
	s,
	format, // The format with which to display the element
	data, // Data buffer containing all bytes for this type
	byte_offset, // Offset into "data" where to grab value from
	typedef_byte_size, // Size of this type in bytes
	bitfield_bit_size, // Size in bits of a bitfield value, if zero don't
	// treat as a bitfield
	bitfield_bit_offset, // Offset in bits of a bitfield value if
	// bitfield_bit_size != 0
	exe_scope);
	} break;

	case clang::Type::Enum:
	// If our format is enum or default, show the enumeration value as its
	// enumeration string value, else just display it as requested.
	if ((format == eFormatEnum \|\| format == eFormatDefault) &&
	GetCompleteType(type))
	return DumpEnumValue(qual_type, s, data, byte_offset, byte_size,
	bitfield_bit_offset, bitfield_bit_size);
	// format was not enum, just fall through and dump the value as
	// requested....
	LLVM_FALLTHROUGH;

	default:
	// We are down to a scalar type that we just need to display.
	{
	uint32_t item_count = 1;
	// A few formats, we might need to modify our size and count for
	// depending
	// on how we are trying to display the value...
	switch (format) {
	default:
	case eFormatBoolean:
	case eFormatBinary:
	case eFormatComplex:
	case eFormatCString: // NULL terminated C strings
	case eFormatDecimal:
	case eFormatEnum:
	case eFormatHex:
	case eFormatHexUppercase:
	case eFormatFloat:
	case eFormatOctal:
	case eFormatOSType:
	case eFormatUnsigned:
	case eFormatPointer:
	case eFormatVectorOfChar:
	case eFormatVectorOfSInt8:
	case eFormatVectorOfUInt8:
	case eFormatVectorOfSInt16:
	case eFormatVectorOfUInt16:
	case eFormatVectorOfSInt32:
	case eFormatVectorOfUInt32:
	case eFormatVectorOfSInt64:
	case eFormatVectorOfUInt64:
	case eFormatVectorOfFloat32:
	case eFormatVectorOfFloat64:
	case eFormatVectorOfUInt128:
	break;

	case eFormatChar:
	case eFormatCharPrintable:
	case eFormatCharArray:
	case eFormatBytes:
	case eFormatUnicode8:
	case eFormatBytesWithASCII:
	item_count = byte_size;
	byte_size = 1;
	break;

	case eFormatUnicode16:
	item_count = byte_size / 2;
	byte_size = 2;
	break;

	case eFormatUnicode32:
	item_count = byte_size / 4;
	byte_size = 4;
	break;
	}
	return DumpDataExtractor(data, s, byte_offset, format, byte_size,
	item_count, UINT32_MAX, LLDB_INVALID_ADDRESS,
	bitfield_bit_size, bitfield_bit_offset,
	exe_scope);
	}
	break;
	}
	}
	return false;
	}

	void TypeSystemClang::DumpSummary(lldb::opaque_compiler_type_t type,
	ExecutionContext exe_ctx, Stream s,
	const lldb_private::DataExtractor &data,
	lldb::offset_t data_byte_offset,
	size_t data_byte_size) {
	uint32_t length = 0;
	if (IsCStringType(type, length)) {
	if (exe_ctx) {
	Process *process = exe_ctx->GetProcessPtr();
	if (process) {
	lldb::offset_t offset = data_byte_offset;
	lldb::addr_t pointer_address = data.GetMaxU64(&offset, data_byte_size);
	std::vector<uint8_t> buf;
	if (length > 0)
	buf.resize(length);
	else
	buf.resize(256);

	DataExtractor cstr_data(&buf.front(), buf.size(),
	process->GetByteOrder(), 4);
	buf.back() = '\0';
	size_t bytes_read;
	size_t total_cstr_len = 0;
	Status error;
	while ((bytes_read = process->ReadMemory(pointer_address, &buf.front(),
	buf.size(), error)) > 0) {
	const size_t len = strlen((const char *)&buf.front());
	if (len == 0)
	break;
	if (total_cstr_len == 0)
	s->PutCString(" \"");
	DumpDataExtractor(cstr_data, s, 0, lldb::eFormatChar, 1, len,
	UINT32_MAX, LLDB_INVALID_ADDRESS, 0, 0);
	total_cstr_len += len;
	if (len < buf.size())
	break;
	pointer_address += total_cstr_len;
	}
	if (total_cstr_len > 0)
	s->PutChar('"');
	}
	}
	}
	}

	void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
	lldb::DescriptionLevel level) {
	StreamFile s(stdout, false);
	DumpTypeDescription(type, &s, level);

	CompilerType ct(this, type);
	const clang::Type *clang_type = ClangUtil::GetQualType(ct).getTypePtr();
	ClangASTMetadata *metadata = GetMetadata(clang_type);
	if (metadata) {
	metadata->Dump(&s);
	}
	}

	void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
	Stream *s,
	lldb::DescriptionLevel level) {
	if (type) {
	clang::QualType qual_type =
	RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});

	llvm::SmallVector<char, 1024> buf;
	llvm::raw_svector_ostream llvm_ostrm(buf);

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface: {
	GetCompleteType(type);

	auto *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
	assert(objc_class_type);
	if (!objc_class_type)
	break;
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	if (!class_interface_decl)
	break;
	if (level == eDescriptionLevelVerbose)
	class_interface_decl->dump(llvm_ostrm);
	else
	class_interface_decl->print(llvm_ostrm,
	getASTContext().getPrintingPolicy(),
	s->GetIndentLevel());
	} break;

	case clang::Type::Typedef: {
	auto *typedef_type = qual_type->getAs<clang::TypedefType>();
	if (!typedef_type)
	break;
	const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
	if (level == eDescriptionLevelVerbose)
	typedef_decl->dump(llvm_ostrm);
	else {
	std::string clang_typedef_name(GetTypeNameForDecl(typedef_decl));
	if (!clang_typedef_name.empty()) {
	s->PutCString("typedef ");
	s->PutCString(clang_typedef_name);
	}
	}
	} break;

	case clang::Type::Record: {
	GetCompleteType(type);

	auto *record_type = llvm::cast<clang::RecordType>(qual_type.getTypePtr());
	const clang::RecordDecl *record_decl = record_type->getDecl();
	if (level == eDescriptionLevelVerbose)
	record_decl->dump(llvm_ostrm);
	else {
	if (auto *cxx_record_decl =
	llvm::dyn_cast<clang::CXXRecordDecl>(record_decl))
	cxx_record_decl->print(llvm_ostrm,
	getASTContext().getPrintingPolicy(),
	s->GetIndentLevel());
	else
	record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(),
	s->GetIndentLevel());
	}
	} break;

	default: {
	if (auto *tag_type =
	llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr())) {
	if (clang::TagDecl *tag_decl = tag_type->getDecl()) {
	if (level == eDescriptionLevelVerbose)
	tag_decl->dump(llvm_ostrm);
	else
	tag_decl->print(llvm_ostrm, 0);
	}
	} else {
	if (level == eDescriptionLevelVerbose)
	qual_type->dump(llvm_ostrm, getASTContext());
	else {
	std::string clang_type_name(qual_type.getAsString());
	if (!clang_type_name.empty())
	s->PutCString(clang_type_name);
	}
	}
	}
	}

	if (buf.size() > 0) {
	s->Write(buf.data(), buf.size());
	}
	}
	}

	void TypeSystemClang::DumpTypeName(const CompilerType &type) {
	if (ClangUtil::IsClangType(type)) {
	clang::QualType qual_type(
	ClangUtil::GetCanonicalQualType(ClangUtil::RemoveFastQualifiers(type)));

	const clang::Type::TypeClass type_class = qual_type->getTypeClass();
	switch (type_class) {
	case clang::Type::Record: {
	const clang::CXXRecordDecl *cxx_record_decl =
	qual_type->getAsCXXRecordDecl();
	if (cxx_record_decl)
	printf("class %s", cxx_record_decl->getName().str().c_str());
	} break;

	case clang::Type::Enum: {
	clang::EnumDecl *enum_decl =
	llvm::cast<clang::EnumType>(qual_type)->getDecl();
	if (enum_decl) {
	printf("enum %s", enum_decl->getName().str().c_str());
	}
	} break;

	case clang::Type::ObjCObject:
	case clang::Type::ObjCInterface: {
	const clang::ObjCObjectType *objc_class_type =
	llvm::dyn_cast<clang::ObjCObjectType>(qual_type);
	if (objc_class_type) {
	clang::ObjCInterfaceDecl *class_interface_decl =
	objc_class_type->getInterface();
	// We currently can't complete objective C types through the newly
	// added ASTContext because it only supports TagDecl objects right
	// now...
	if (class_interface_decl)
	printf("@class %s", class_interface_decl->getName().str().c_str());
	}
	} break;

	case clang::Type::Typedef:
	printf("typedef %s", llvm::cast<clang::TypedefType>(qual_type)
	->getDecl()
	->getName()
	.str()
	.c_str());
	break;

	case clang::Type::Auto:
	printf("auto ");
	return DumpTypeName(CompilerType(type.GetTypeSystem(),
	llvm::cast<clang::AutoType>(qual_type)
	->getDeducedType()
	.getAsOpaquePtr()));

	case clang::Type::Elaborated:
	printf("elaborated ");
	return DumpTypeName(CompilerType(
	type.GetTypeSystem(), llvm::cast<clang::ElaboratedType>(qual_type)
	->getNamedType()
	.getAsOpaquePtr()));

	case clang::Type::Paren:
	printf("paren ");
	return DumpTypeName(CompilerType(
	type.GetTypeSystem(),
	llvm::cast<clang::ParenType>(qual_type)->desugar().getAsOpaquePtr()));

	default:
	printf("TypeSystemClang::DumpTypeName() type_class = %u", type_class);
	break;
	}
	}
	}

	clang::ClassTemplateDecl *TypeSystemClang::ParseClassTemplateDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	lldb::AccessType access_type, const char *parent_name, int tag_decl_kind,
	const TypeSystemClang::TemplateParameterInfos &template_param_infos) {
	if (template_param_infos.IsValid()) {
	std::string template_basename(parent_name);
	template_basename.erase(template_basename.find('<'));

	return CreateClassTemplateDecl(decl_ctx, owning_module, access_type,
	template_basename.c_str(), tag_decl_kind,
	template_param_infos);
	}
	return nullptr;
	}

	void TypeSystemClang::CompleteTagDecl(clang::TagDecl *decl) {
	SymbolFile *sym_file = GetSymbolFile();
	if (sym_file) {
	CompilerType clang_type = GetTypeForDecl(decl);
	if (clang_type)
	sym_file->CompleteType(clang_type);
	}
	}

	void TypeSystemClang::CompleteObjCInterfaceDecl(
	clang::ObjCInterfaceDecl *decl) {
	SymbolFile *sym_file = GetSymbolFile();
	if (sym_file) {
	CompilerType clang_type = GetTypeForDecl(decl);
	if (clang_type)
	sym_file->CompleteType(clang_type);
	}
	}

	DWARFASTParser *TypeSystemClang::GetDWARFParser() {
	if (!m_dwarf_ast_parser_up)
	m_dwarf_ast_parser_up = std::make_unique<DWARFASTParserClang>(*this);
	return m_dwarf_ast_parser_up.get();
	}

	PDBASTParser *TypeSystemClang::GetPDBParser() {
	if (!m_pdb_ast_parser_up)
	m_pdb_ast_parser_up = std::make_unique<PDBASTParser>(*this);
	return m_pdb_ast_parser_up.get();
	}

	bool TypeSystemClang::LayoutRecordType(
	const clang::RecordDecl *record_decl, uint64_t &bit_size,
	uint64_t &alignment,
	llvm::DenseMap<const clang::FieldDecl *, uint64_t> &field_offsets,
	llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
	&base_offsets,
	llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
	&vbase_offsets) {
	lldb_private::ClangASTImporter *importer = nullptr;
	if (m_dwarf_ast_parser_up)
	importer = &m_dwarf_ast_parser_up->GetClangASTImporter();
	if (!importer && m_pdb_ast_parser_up)
	importer = &m_pdb_ast_parser_up->GetClangASTImporter();
	if (!importer)
	return false;

	return importer->LayoutRecordType(record_decl, bit_size, alignment,
	field_offsets, base_offsets, vbase_offsets);
	}

	// CompilerDecl override functions

	ConstString TypeSystemClang::DeclGetName(void *opaque_decl) {
	if (opaque_decl) {
	clang::NamedDecl *nd =
	llvm::dyn_cast<NamedDecl>((clang::Decl *)opaque_decl);
	if (nd != nullptr)
	return ConstString(nd->getDeclName().getAsString());
	}
	return ConstString();
	}

	ConstString TypeSystemClang::DeclGetMangledName(void *opaque_decl) {
	if (opaque_decl) {
	clang::NamedDecl *nd =
	llvm::dyn_cast<clang::NamedDecl>((clang::Decl *)opaque_decl);
	if (nd != nullptr && !llvm::isa<clang::ObjCMethodDecl>(nd)) {
	clang::MangleContext *mc = getMangleContext();
	if (mc && mc->shouldMangleCXXName(nd)) {
	llvm::SmallVector<char, 1024> buf;
	llvm::raw_svector_ostream llvm_ostrm(buf);
	if (llvm::isa<clang::CXXConstructorDecl>(nd)) {
	mc->mangleName(
	clang::GlobalDecl(llvm::dyn_cast<clang::CXXConstructorDecl>(nd),
	Ctor_Complete),
	llvm_ostrm);
	} else if (llvm::isa<clang::CXXDestructorDecl>(nd)) {
	mc->mangleName(
	clang::GlobalDecl(llvm::dyn_cast<clang::CXXDestructorDecl>(nd),
	Dtor_Complete),
	llvm_ostrm);
	} else {
	mc->mangleName(nd, llvm_ostrm);
	}
	if (buf.size() > 0)
	return ConstString(buf.data(), buf.size());
	}
	}
	}
	return ConstString();
	}

	CompilerDeclContext TypeSystemClang::DeclGetDeclContext(void *opaque_decl) {
	if (opaque_decl)
	return CreateDeclContext(((clang::Decl *)opaque_decl)->getDeclContext());
	return CompilerDeclContext();
	}

	CompilerType TypeSystemClang::DeclGetFunctionReturnType(void *opaque_decl) {
	if (clang::FunctionDecl *func_decl =
	llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl))
	return GetType(func_decl->getReturnType());
	if (clang::ObjCMethodDecl *objc_method =
	llvm::dyn_cast<clang::ObjCMethodDecl>((clang::Decl *)opaque_decl))
	return GetType(objc_method->getReturnType());
	else
	return CompilerType();
	}

	size_t TypeSystemClang::DeclGetFunctionNumArguments(void *opaque_decl) {
	if (clang::FunctionDecl *func_decl =
	llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl))
	return func_decl->param_size();
	if (clang::ObjCMethodDecl *objc_method =
	llvm::dyn_cast<clang::ObjCMethodDecl>((clang::Decl *)opaque_decl))
	return objc_method->param_size();
	else
	return 0;
	}

	CompilerType TypeSystemClang::DeclGetFunctionArgumentType(void *opaque_decl,
	size_t idx) {
	if (clang::FunctionDecl *func_decl =
	llvm::dyn_cast<clang::FunctionDecl>((clang::Decl *)opaque_decl)) {
	if (idx < func_decl->param_size()) {
	ParmVarDecl *var_decl = func_decl->getParamDecl(idx);
	if (var_decl)
	return GetType(var_decl->getOriginalType());
	}
	} else if (clang::ObjCMethodDecl *objc_method =
	llvm::dyn_cast<clang::ObjCMethodDecl>(
	(clang::Decl *)opaque_decl)) {
	if (idx < objc_method->param_size())
	return GetType(objc_method->parameters()[idx]->getOriginalType());
	}
	return CompilerType();
	}

	// CompilerDeclContext functions

	std::vector<CompilerDecl> TypeSystemClang::DeclContextFindDeclByName(
	void *opaque_decl_ctx, ConstString name, const bool ignore_using_decls) {
	std::vector<CompilerDecl> found_decls;
	SymbolFile *symbol_file = GetSymbolFile();
	if (opaque_decl_ctx && symbol_file) {
	DeclContext root_decl_ctx = (DeclContext )opaque_decl_ctx;
	std::set<DeclContext *> searched;
	std::multimap<DeclContext , DeclContext > search_queue;

	for (clang::DeclContext *decl_context = root_decl_ctx;
	decl_context != nullptr && found_decls.empty();
	decl_context = decl_context->getParent()) {
	search_queue.insert(std::make_pair(decl_context, decl_context));

	for (auto it = search_queue.find(decl_context); it != search_queue.end();
	it++) {
	if (!searched.insert(it->second).second)
	continue;
	symbol_file->ParseDeclsForContext(
	CreateDeclContext(it->second));

	for (clang::Decl *child : it->second->decls()) {
	if (clang::UsingDirectiveDecl *ud =
	llvm::dyn_cast<clang::UsingDirectiveDecl>(child)) {
	if (ignore_using_decls)
	continue;
	clang::DeclContext *from = ud->getCommonAncestor();
	if (searched.find(ud->getNominatedNamespace()) == searched.end())
	search_queue.insert(
	std::make_pair(from, ud->getNominatedNamespace()));
	} else if (clang::UsingDecl *ud =
	llvm::dyn_cast<clang::UsingDecl>(child)) {
	if (ignore_using_decls)
	continue;
	for (clang::UsingShadowDecl *usd : ud->shadows()) {
	clang::Decl *target = usd->getTargetDecl();
	if (clang::NamedDecl *nd =
	llvm::dyn_cast<clang::NamedDecl>(target)) {
	IdentifierInfo *ii = nd->getIdentifier();
	if (ii != nullptr &&
	ii->getName().equals(name.AsCString(nullptr)))
	found_decls.push_back(GetCompilerDecl(nd));
	}
	}
	} else if (clang::NamedDecl *nd =
	llvm::dyn_cast<clang::NamedDecl>(child)) {
	IdentifierInfo *ii = nd->getIdentifier();
	if (ii != nullptr && ii->getName().equals(name.AsCString(nullptr)))
	found_decls.push_back(GetCompilerDecl(nd));
	}
	}
	}
	}
	}
	return found_decls;
	}

	// Look for child_decl_ctx's lookup scope in frame_decl_ctx and its parents,
	// and return the number of levels it took to find it, or
	// LLDB_INVALID_DECL_LEVEL if not found. If the decl was imported via a using
	// declaration, its name and/or type, if set, will be used to check that the
	// decl found in the scope is a match.
	//
	// The optional name is required by languages (like C++) to handle using
	// declarations like:
	//
	// void poo();
	// namespace ns {
	// void foo();
	// void goo();
	// }
	// void bar() {
	// using ns::foo;
	// // CountDeclLevels returns 0 for 'foo', 1 for 'poo', and
	// // LLDB_INVALID_DECL_LEVEL for 'goo'.
	// }
	//
	// The optional type is useful in the case that there's a specific overload
	// that we're looking for that might otherwise be shadowed, like:
	//
	// void foo(int);
	// namespace ns {
	// void foo();
	// }
	// void bar() {
	// using ns::foo;
	// // CountDeclLevels returns 0 for { 'foo', void() },
	// // 1 for { 'foo', void(int) }, and
	// // LLDB_INVALID_DECL_LEVEL for { 'foo', void(int, int) }.
	// }
	//
	// NOTE: Because file statics are at the TranslationUnit along with globals, a
	// function at file scope will return the same level as a function at global
	// scope. Ideally we'd like to treat the file scope as an additional scope just
	// below the global scope. More work needs to be done to recognise that, if
	// the decl we're trying to look up is static, we should compare its source
	// file with that of the current scope and return a lower number for it.
	uint32_t TypeSystemClang::CountDeclLevels(clang::DeclContext *frame_decl_ctx,
	clang::DeclContext *child_decl_ctx,
	ConstString *child_name,
	CompilerType *child_type) {
	SymbolFile *symbol_file = GetSymbolFile();
	if (frame_decl_ctx && symbol_file) {
	std::set<DeclContext *> searched;
	std::multimap<DeclContext , DeclContext > search_queue;

	// Get the lookup scope for the decl we're trying to find.
	clang::DeclContext *parent_decl_ctx = child_decl_ctx->getParent();

	// Look for it in our scope's decl context and its parents.
	uint32_t level = 0;
	for (clang::DeclContext *decl_ctx = frame_decl_ctx; decl_ctx != nullptr;
	decl_ctx = decl_ctx->getParent()) {
	if (!decl_ctx->isLookupContext())
	continue;
	if (decl_ctx == parent_decl_ctx)
	// Found it!
	return level;
	search_queue.insert(std::make_pair(decl_ctx, decl_ctx));
	for (auto it = search_queue.find(decl_ctx); it != search_queue.end();
	it++) {
	if (searched.find(it->second) != searched.end())
	continue;

	// Currently DWARF has one shared translation unit for all Decls at top
	// level, so this would erroneously find using statements anywhere. So
	// don't look at the top-level translation unit.
	// TODO fix this and add a testcase that depends on it.

	if (llvm::isa<clang::TranslationUnitDecl>(it->second))
	continue;

	searched.insert(it->second);
	symbol_file->ParseDeclsForContext(
	CreateDeclContext(it->second));

	for (clang::Decl *child : it->second->decls()) {
	if (clang::UsingDirectiveDecl *ud =
	llvm::dyn_cast<clang::UsingDirectiveDecl>(child)) {
	clang::DeclContext *ns = ud->getNominatedNamespace();
	if (ns == parent_decl_ctx)
	// Found it!
	return level;
	clang::DeclContext *from = ud->getCommonAncestor();
	if (searched.find(ns) == searched.end())
	search_queue.insert(std::make_pair(from, ns));
	} else if (child_name) {
	if (clang::UsingDecl *ud =
	llvm::dyn_cast<clang::UsingDecl>(child)) {
	for (clang::UsingShadowDecl *usd : ud->shadows()) {
	clang::Decl *target = usd->getTargetDecl();
	clang::NamedDecl *nd = llvm::dyn_cast<clang::NamedDecl>(target);
	if (!nd)
	continue;
	// Check names.
	IdentifierInfo *ii = nd->getIdentifier();
	if (ii == nullptr \|\|
	!ii->getName().equals(child_name->AsCString(nullptr)))
	continue;
	// Check types, if one was provided.
	if (child_type) {
	CompilerType clang_type = GetTypeForDecl(nd);
	if (!AreTypesSame(clang_type, *child_type,
	/ignore_qualifiers=/true))
	continue;
	}
	// Found it!
	return level;
	}
	}
	}
	}
	}
	++level;
	}
	}
	return LLDB_INVALID_DECL_LEVEL;
	}

	ConstString TypeSystemClang::DeclContextGetName(void *opaque_decl_ctx) {
	if (opaque_decl_ctx) {
	clang::NamedDecl *named_decl =
	llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx);
	if (named_decl)
	return ConstString(named_decl->getName());
	}
	return ConstString();
	}

	ConstString
	TypeSystemClang::DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) {
	if (opaque_decl_ctx) {
	clang::NamedDecl *named_decl =
	llvm::dyn_cast<clang::NamedDecl>((clang::DeclContext *)opaque_decl_ctx);
	if (named_decl)
	return ConstString(GetTypeNameForDecl(named_decl));
	}
	return ConstString();
	}

	bool TypeSystemClang::DeclContextIsClassMethod(
	void opaque_decl_ctx, lldb::LanguageType language_ptr,
	bool is_instance_method_ptr, ConstString language_object_name_ptr) {
	if (opaque_decl_ctx) {
	clang::DeclContext decl_ctx = (clang::DeclContext )opaque_decl_ctx;
	if (ObjCMethodDecl *objc_method =
	llvm::dyn_cast<clang::ObjCMethodDecl>(decl_ctx)) {
	if (is_instance_method_ptr)
	*is_instance_method_ptr = objc_method->isInstanceMethod();
	if (language_ptr)
	*language_ptr = eLanguageTypeObjC;
	if (language_object_name_ptr)
	language_object_name_ptr->SetCString("self");
	return true;
	} else if (CXXMethodDecl *cxx_method =
	llvm::dyn_cast<clang::CXXMethodDecl>(decl_ctx)) {
	if (is_instance_method_ptr)
	*is_instance_method_ptr = cxx_method->isInstance();
	if (language_ptr)
	*language_ptr = eLanguageTypeC_plus_plus;
	if (language_object_name_ptr)
	language_object_name_ptr->SetCString("this");
	return true;
	} else if (clang::FunctionDecl *function_decl =
	llvm::dyn_cast<clang::FunctionDecl>(decl_ctx)) {
	ClangASTMetadata *metadata = GetMetadata(function_decl);
	if (metadata && metadata->HasObjectPtr()) {
	if (is_instance_method_ptr)
	*is_instance_method_ptr = true;
	if (language_ptr)
	*language_ptr = eLanguageTypeObjC;
	if (language_object_name_ptr)
	language_object_name_ptr->SetCString(metadata->GetObjectPtrName());
	return true;
	}
	}
	}
	return false;
	}

	bool TypeSystemClang::DeclContextIsContainedInLookup(
	void opaque_decl_ctx, void other_opaque_decl_ctx) {
	auto decl_ctx = (clang::DeclContext )opaque_decl_ctx;
	auto other = (clang::DeclContext )other_opaque_decl_ctx;

	do {
	// A decl context always includes its own contents in its lookup.
	if (decl_ctx == other)
	return true;

	// If we have an inline namespace, then the lookup of the parent context
	// also includes the inline namespace contents.
	} while (other->isInlineNamespace() && (other = other->getParent()));

	return false;
	}

	static bool IsClangDeclContext(const CompilerDeclContext &dc) {
	return dc.IsValid() && isa<TypeSystemClang>(dc.GetTypeSystem());
	}

	clang::DeclContext *
	TypeSystemClang::DeclContextGetAsDeclContext(const CompilerDeclContext &dc) {
	if (IsClangDeclContext(dc))
	return (clang::DeclContext *)dc.GetOpaqueDeclContext();
	return nullptr;
	}

	ObjCMethodDecl *
	TypeSystemClang::DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc) {
	if (IsClangDeclContext(dc))
	return llvm::dyn_cast<clang::ObjCMethodDecl>(
	(clang::DeclContext *)dc.GetOpaqueDeclContext());
	return nullptr;
	}

	CXXMethodDecl *
	TypeSystemClang::DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc) {
	if (IsClangDeclContext(dc))
	return llvm::dyn_cast<clang::CXXMethodDecl>(
	(clang::DeclContext *)dc.GetOpaqueDeclContext());
	return nullptr;
	}

	clang::FunctionDecl *
	TypeSystemClang::DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc) {
	if (IsClangDeclContext(dc))
	return llvm::dyn_cast<clang::FunctionDecl>(
	(clang::DeclContext *)dc.GetOpaqueDeclContext());
	return nullptr;
	}

	clang::NamespaceDecl *
	TypeSystemClang::DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc) {
	if (IsClangDeclContext(dc))
	return llvm::dyn_cast<clang::NamespaceDecl>(
	(clang::DeclContext *)dc.GetOpaqueDeclContext());
	return nullptr;
	}

	ClangASTMetadata *
	TypeSystemClang::DeclContextGetMetaData(const CompilerDeclContext &dc,
	const Decl *object) {
	TypeSystemClang *ast = llvm::cast<TypeSystemClang>(dc.GetTypeSystem());
	return ast->GetMetadata(object);
	}

	clang::ASTContext *
	TypeSystemClang::DeclContextGetTypeSystemClang(const CompilerDeclContext &dc) {
	TypeSystemClang *ast =
	llvm::dyn_cast_or_null<TypeSystemClang>(dc.GetTypeSystem());
	if (ast)
	return &ast->getASTContext();
	return nullptr;
	}

	namespace {
	/// A specialized scratch AST used within ScratchTypeSystemClang.
	/// These are the ASTs backing the different IsolatedASTKinds. They behave
	/// like a normal ScratchTypeSystemClang but they don't own their own
	/// persistent storage or target reference.
	class SpecializedScratchAST : public TypeSystemClang {
	public:
	/// \param name The display name of the TypeSystemClang instance.
	/// \param triple The triple used for the TypeSystemClang instance.
	/// \param ast_source The ClangASTSource that should be used to complete
	/// type information.
	SpecializedScratchAST(llvm::StringRef name, llvm::Triple triple,
	std::unique_ptr<ClangASTSource> ast_source)
	: TypeSystemClang(name, triple),
	m_scratch_ast_source_up(std::move(ast_source)) {
	// Setup the ClangASTSource to complete this AST.
	m_scratch_ast_source_up->InstallASTContext(*this);
	llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> proxy_ast_source(
	m_scratch_ast_source_up->CreateProxy());
	SetExternalSource(proxy_ast_source);
	}

	/// The ExternalASTSource that performs lookups and completes types.
	std::unique_ptr<ClangASTSource> m_scratch_ast_source_up;
	};
	} // namespace

	char ScratchTypeSystemClang::ID;
	const llvm::NoneType ScratchTypeSystemClang::DefaultAST = llvm::None;

	ScratchTypeSystemClang::ScratchTypeSystemClang(Target &target,
	llvm::Triple triple)
	: TypeSystemClang("scratch ASTContext", triple), m_triple(triple),
	m_target_wp(target.shared_from_this()),
	m_persistent_variables(
	new ClangPersistentVariables(target.shared_from_this())) {
	m_scratch_ast_source_up = CreateASTSource();
	m_scratch_ast_source_up->InstallASTContext(*this);
	llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> proxy_ast_source(
	m_scratch_ast_source_up->CreateProxy());
	SetExternalSource(proxy_ast_source);
	}

	void ScratchTypeSystemClang::Finalize() {
	TypeSystemClang::Finalize();
	m_scratch_ast_source_up.reset();
	}

	TypeSystemClang *
	ScratchTypeSystemClang::GetForTarget(Target &target,
	llvm::Optional<IsolatedASTKind> ast_kind,
	bool create_on_demand) {
	auto type_system_or_err = target.GetScratchTypeSystemForLanguage(
	lldb::eLanguageTypeC, create_on_demand);
	if (auto err = type_system_or_err.takeError()) {
	LLDB_LOG_ERROR(GetLog(LLDBLog::Target), std::move(err),
	"Couldn't get scratch TypeSystemClang");
	return nullptr;
	}
	ScratchTypeSystemClang &scratch_ast =
	llvm::cast<ScratchTypeSystemClang>(type_system_or_err.get());
	// If no dedicated sub-AST was requested, just return the main AST.
	if (ast_kind == DefaultAST)
	return &scratch_ast;
	// Search the sub-ASTs.
	return &scratch_ast.GetIsolatedAST(*ast_kind);
	}

	/// Returns a human-readable name that uniquely identifiers the sub-AST kind.
	static llvm::StringRef
	GetNameForIsolatedASTKind(ScratchTypeSystemClang::IsolatedASTKind kind) {
	switch (kind) {
	case ScratchTypeSystemClang::IsolatedASTKind::CppModules:
	return "C++ modules";
	}
	llvm_unreachable("Unimplemented IsolatedASTKind?");
	}

	void ScratchTypeSystemClang::Dump(llvm::raw_ostream &output) {
	// First dump the main scratch AST.
	output << "State of scratch Clang type system:\n";
	TypeSystemClang::Dump(output);

	// Now sort the isolated sub-ASTs.
	typedef std::pair<IsolatedASTKey, TypeSystem *> KeyAndTS;
	std::vector<KeyAndTS> sorted_typesystems;
	for (const auto &a : m_isolated_asts)
	sorted_typesystems.emplace_back(a.first, a.second.get());
	llvm::stable_sort(sorted_typesystems, llvm::less_first());

	// Dump each sub-AST too.
	for (const auto &a : sorted_typesystems) {
	IsolatedASTKind kind =
	static_cast<ScratchTypeSystemClang::IsolatedASTKind>(a.first);
	output << "State of scratch Clang type subsystem "
	<< GetNameForIsolatedASTKind(kind) << ":\n";
	a.second->Dump(output);
	}
	}

	UserExpression *ScratchTypeSystemClang::GetUserExpression(
	llvm::StringRef expr, llvm::StringRef prefix, lldb::LanguageType language,
	Expression::ResultType desired_type,
	const EvaluateExpressionOptions &options, ValueObject *ctx_obj) {
	TargetSP target_sp = m_target_wp.lock();
	if (!target_sp)
	return nullptr;

	return new ClangUserExpression(*target_sp.get(), expr, prefix, language,
	desired_type, options, ctx_obj);
	}

	FunctionCaller *ScratchTypeSystemClang::GetFunctionCaller(
	const CompilerType &return_type, const Address &function_address,
	const ValueList &arg_value_list, const char *name) {
	TargetSP target_sp = m_target_wp.lock();
	if (!target_sp)
	return nullptr;

	Process *process = target_sp->GetProcessSP().get();
	if (!process)
	return nullptr;

	return new ClangFunctionCaller(*process, return_type, function_address,
	arg_value_list, name);
	}

	std::unique_ptr<UtilityFunction>
	ScratchTypeSystemClang::CreateUtilityFunction(std::string text,
	std::string name) {
	TargetSP target_sp = m_target_wp.lock();
	if (!target_sp)
	return {};

	return std::make_unique<ClangUtilityFunction>(
	*target_sp.get(), std::move(text), std::move(name),
	target_sp->GetDebugUtilityExpression());
	}

	PersistentExpressionState *
	ScratchTypeSystemClang::GetPersistentExpressionState() {
	return m_persistent_variables.get();
	}

	void ScratchTypeSystemClang::ForgetSource(ASTContext *src_ctx,
	ClangASTImporter &importer) {
	// Remove it as a source from the main AST.
	importer.ForgetSource(&getASTContext(), src_ctx);
	// Remove it as a source from all created sub-ASTs.
	for (const auto &a : m_isolated_asts)
	importer.ForgetSource(&a.second->getASTContext(), src_ctx);
	}

	std::unique_ptr<ClangASTSource> ScratchTypeSystemClang::CreateASTSource() {
	return std::make_unique<ClangASTSource>(
	m_target_wp.lock()->shared_from_this(),
	m_persistent_variables->GetClangASTImporter());
	}

	static llvm::StringRef
	GetSpecializedASTName(ScratchTypeSystemClang::IsolatedASTKind feature) {
	switch (feature) {
	case ScratchTypeSystemClang::IsolatedASTKind::CppModules:
	return "scratch ASTContext for C++ module types";
	}
	llvm_unreachable("Unimplemented ASTFeature kind?");
	}

	TypeSystemClang &ScratchTypeSystemClang::GetIsolatedAST(
	ScratchTypeSystemClang::IsolatedASTKind feature) {
	auto found_ast = m_isolated_asts.find(feature);
	if (found_ast != m_isolated_asts.end())
	return *found_ast->second;

	// Couldn't find the requested sub-AST, so create it now.
	std::unique_ptr<TypeSystemClang> new_ast;
	new_ast.reset(new SpecializedScratchAST(GetSpecializedASTName(feature),
	m_triple, CreateASTSource()));
	m_isolated_asts[feature] = std::move(new_ast);
	return *m_isolated_asts[feature];
	}
	diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
	index 24dbb71c8f4d..7f25a6df548f 100644
	--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
	+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
	@@ -1,1234 +1,1235 @@
	//===-- TypeSystemClang.h ---------------------------------------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H
	#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H

	#include <cstdint>

	#include <functional>
	#include <initializer_list>
	#include <map>
	#include <memory>
	#include <set>
	#include <string>
	#include <utility>
	#include <vector>

	#include "clang/AST/ASTContext.h"
	#include "clang/AST/ASTFwd.h"
	#include "clang/AST/TemplateBase.h"
	#include "clang/Basic/TargetInfo.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/SmallVector.h"

	#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
	#include "lldb/Expression/ExpressionVariable.h"
	#include "lldb/Symbol/CompilerType.h"
	#include "lldb/Symbol/TypeSystem.h"
	#include "lldb/Target/Target.h"
	#include "lldb/Utility/ConstString.h"
	#include "lldb/Utility/Flags.h"
	#include "lldb/Utility/Log.h"
	#include "lldb/lldb-enumerations.h"

	class DWARFASTParserClang;
	class PDBASTParser;

	namespace clang {
	class FileManager;
	class HeaderSearch;
	class ModuleMap;
	} // namespace clang

	namespace lldb_private {

	class ClangASTMetadata;
	class ClangASTSource;
	class Declaration;

	/// A Clang module ID.
	class OptionalClangModuleID {
	unsigned m_id = 0;

	public:
	OptionalClangModuleID() = default;
	explicit OptionalClangModuleID(unsigned id) : m_id(id) {}
	bool HasValue() const { return m_id != 0; }
	unsigned GetValue() const { return m_id; }
	};

	/// The implementation of lldb::Type's m_payload field for TypeSystemClang.
	class TypePayloadClang {
	/// The Layout is as follows:
	/// \verbatim
	/// bit 0..30 ... Owning Module ID.
	/// bit 31 ...... IsCompleteObjCClass.
	/// \endverbatim
	Type::Payload m_payload = 0;

	public:
	TypePayloadClang() = default;
	explicit TypePayloadClang(OptionalClangModuleID owning_module,
	bool is_complete_objc_class = false);
	explicit TypePayloadClang(uint32_t opaque_payload) : m_payload(opaque_payload) {}
	operator Type::Payload() { return m_payload; }

	static constexpr unsigned ObjCClassBit = 1 << 31;
	bool IsCompleteObjCClass() { return Flags(m_payload).Test(ObjCClassBit); }
	void SetIsCompleteObjCClass(bool is_complete_objc_class) {
	m_payload = is_complete_objc_class ? Flags(m_payload).Set(ObjCClassBit)
	: Flags(m_payload).Clear(ObjCClassBit);
	}
	OptionalClangModuleID GetOwningModule() {
	return OptionalClangModuleID(Flags(m_payload).Clear(ObjCClassBit));
	}
	void SetOwningModule(OptionalClangModuleID id);
	/// \}
	};
	-
	+
	/// A TypeSystem implementation based on Clang.
	///
	/// This class uses a single clang::ASTContext as the backend for storing
	/// its types and declarations. Every clang::ASTContext should also just have
	/// a single associated TypeSystemClang instance that manages it.
	///
	/// The clang::ASTContext instance can either be created by TypeSystemClang
	/// itself or it can adopt an existing clang::ASTContext (for example, when
	/// it is necessary to provide a TypeSystem interface for an existing
	/// clang::ASTContext that was created by clang::CompilerInstance).
	class TypeSystemClang : public TypeSystem {
	// LLVM RTTI support
	static char ID;

	public:
	typedef void (CompleteTagDeclCallback)(void baton, clang::TagDecl *);
	typedef void (CompleteObjCInterfaceDeclCallback)(void baton,
	clang::ObjCInterfaceDecl *);

	// llvm casting support
	bool isA(const void *ClassID) const override { return ClassID == &ID; }
	static bool classof(const TypeSystem *ts) { return ts->isA(&ID); }

	/// Constructs a TypeSystemClang with an ASTContext using the given triple.
	///
	/// \param name The name for the TypeSystemClang (for logging purposes)
	/// \param triple The llvm::Triple used for the ASTContext. The triple defines
	/// certain characteristics of the ASTContext and its types
	/// (e.g., whether certain primitive types exist or what their
	/// signedness is).
	explicit TypeSystemClang(llvm::StringRef name, llvm::Triple triple);

	/// Constructs a TypeSystemClang that uses an existing ASTContext internally.
	/// Useful when having an existing ASTContext created by Clang.
	///
	/// \param name The name for the TypeSystemClang (for logging purposes)
	/// \param existing_ctxt An existing ASTContext.
	explicit TypeSystemClang(llvm::StringRef name,
	clang::ASTContext &existing_ctxt);

	~TypeSystemClang() override;

	void Finalize() override;

	// PluginInterface functions
	llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); }

	static llvm::StringRef GetPluginNameStatic() { return "clang"; }

	static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language,
	Module module, Target target);

	static LanguageSet GetSupportedLanguagesForTypes();
	static LanguageSet GetSupportedLanguagesForExpressions();

	static void Initialize();

	static void Terminate();

	static TypeSystemClang GetASTContext(clang::ASTContext ast_ctx);

	/// Returns the display name of this TypeSystemClang that indicates what
	/// purpose it serves in LLDB. Used for example in logs.
	llvm::StringRef getDisplayName() const { return m_display_name; }

	/// Returns the clang::ASTContext instance managed by this TypeSystemClang.
	clang::ASTContext &getASTContext();

	clang::MangleContext *getMangleContext();

	std::shared_ptr<clang::TargetOptions> &getTargetOptions();

	clang::TargetInfo *getTargetInfo();

	void setSema(clang::Sema *s);
	clang::Sema *getSema() { return m_sema; }

	const char *GetTargetTriple();

	void SetExternalSource(
	llvm::IntrusiveRefCntPtr<clang::ExternalASTSource> &ast_source_up);

	bool GetCompleteDecl(clang::Decl *decl) {
	return TypeSystemClang::GetCompleteDecl(&getASTContext(), decl);
	}

	static void DumpDeclHiearchy(clang::Decl *decl);

	static void DumpDeclContextHiearchy(clang::DeclContext *decl_ctx);

	static bool DeclsAreEquivalent(clang::Decl lhs_decl, clang::Decl rhs_decl);

	static bool GetCompleteDecl(clang::ASTContext ast, clang::Decl decl);

	void SetMetadataAsUserID(const clang::Decl *decl, lldb::user_id_t user_id);
	void SetMetadataAsUserID(const clang::Type *type, lldb::user_id_t user_id);

	void SetMetadata(const clang::Decl *object, ClangASTMetadata &meta_data);

	void SetMetadata(const clang::Type *object, ClangASTMetadata &meta_data);
	ClangASTMetadata GetMetadata(const clang::Decl object);
	ClangASTMetadata GetMetadata(const clang::Type object);

	void SetCXXRecordDeclAccess(const clang::CXXRecordDecl *object,
	clang::AccessSpecifier access);
	clang::AccessSpecifier
	GetCXXRecordDeclAccess(const clang::CXXRecordDecl *object);

	// Basic Types
	CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding,
	size_t bit_size) override;

	CompilerType GetBasicType(lldb::BasicType type);

	static lldb::BasicType GetBasicTypeEnumeration(ConstString name);

	CompilerType
	GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name,
	uint32_t dw_ate, uint32_t bit_size);

	CompilerType GetCStringType(bool is_const);

	static clang::DeclContext *GetDeclContextForType(clang::QualType type);

	static clang::DeclContext *GetDeclContextForType(const CompilerType &type);

	uint32_t GetPointerByteSize() override;

	clang::TranslationUnitDecl *GetTranslationUnitDecl() {
	return getASTContext().getTranslationUnitDecl();
	}

	static bool AreTypesSame(CompilerType type1, CompilerType type2,
	bool ignore_qualifiers = false);

	/// Creates a CompilerType form the given QualType with the current
	/// TypeSystemClang instance as the CompilerType's typesystem.
	/// \param qt The QualType for a type that belongs to the ASTContext of this
	/// TypeSystemClang.
	/// \return The CompilerType representing the given QualType. If the
	/// QualType's type pointer is a nullptr then the function returns an
	/// invalid CompilerType.
	CompilerType GetType(clang::QualType qt) {
	if (qt.getTypePtrOrNull() == nullptr)
	return CompilerType();
	// Check that the type actually belongs to this TypeSystemClang.
	assert(qt->getAsTagDecl() == nullptr \|\|
	&qt->getAsTagDecl()->getASTContext() == &getASTContext());
	return CompilerType(this, qt.getAsOpaquePtr());
	}

	CompilerType GetTypeForDecl(clang::NamedDecl *decl);

	CompilerType GetTypeForDecl(clang::TagDecl *decl);

	CompilerType GetTypeForDecl(clang::ObjCInterfaceDecl *objc_decl);

	template <typename RecordDeclType>
	CompilerType
	GetTypeForIdentifier(ConstString type_name,
	clang::DeclContext *decl_context = nullptr) {
	CompilerType compiler_type;

	if (type_name.GetLength()) {
	clang::ASTContext &ast = getASTContext();
	if (!decl_context)
	decl_context = ast.getTranslationUnitDecl();

	clang::IdentifierInfo &myIdent = ast.Idents.get(type_name.GetCString());
	clang::DeclarationName myName =
	ast.DeclarationNames.getIdentifier(&myIdent);

	clang::DeclContext::lookup_result result = decl_context->lookup(myName);

	if (!result.empty()) {
	clang::NamedDecl named_decl = result.begin();
	if (const RecordDeclType *record_decl =
	llvm::dyn_cast<RecordDeclType>(named_decl))
	compiler_type.SetCompilerType(
	this, clang::QualType(record_decl->getTypeForDecl(), 0)
	.getAsOpaquePtr());
	}
	}

	return compiler_type;
	}

	CompilerType CreateStructForIdentifier(
	ConstString type_name,
	const std::initializer_list<std::pair<const char *, CompilerType>>
	&type_fields,
	bool packed = false);

	CompilerType GetOrCreateStructForIdentifier(
	ConstString type_name,
	const std::initializer_list<std::pair<const char *, CompilerType>>
	&type_fields,
	bool packed = false);

	static bool IsOperator(llvm::StringRef name,
	clang::OverloadedOperatorKind &op_kind);

	// Structure, Unions, Classes

	static clang::AccessSpecifier
	ConvertAccessTypeToAccessSpecifier(lldb::AccessType access);

	static clang::AccessSpecifier
	UnifyAccessSpecifiers(clang::AccessSpecifier lhs, clang::AccessSpecifier rhs);

	static uint32_t GetNumBaseClasses(const clang::CXXRecordDecl *cxx_record_decl,
	bool omit_empty_base_classes);

	/// Synthesize a clang::Module and return its ID or a default-constructed ID.
	OptionalClangModuleID GetOrCreateClangModule(llvm::StringRef name,
	OptionalClangModuleID parent,
	bool is_framework = false,
	bool is_explicit = false);

	CompilerType CreateRecordType(clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module,
	lldb::AccessType access_type,
	llvm::StringRef name, int kind,
	lldb::LanguageType language,
	ClangASTMetadata *metadata = nullptr,
	bool exports_symbols = false);

	class TemplateParameterInfos {
	public:
	bool IsValid() const {
	// Having a pack name but no packed args doesn't make sense, so mark
	// these template parameters as invalid.
	if (pack_name && !packed_args)
	return false;
	return args.size() == names.size() &&
	(!packed_args \|\| !packed_args->packed_args);
	}

	bool hasParameterPack() const { return static_cast<bool>(packed_args); }

	llvm::SmallVector<const char *, 2> names;
	llvm::SmallVector<clang::TemplateArgument, 2> args;
	-
	+
	const char * pack_name = nullptr;
	std::unique_ptr<TemplateParameterInfos> packed_args;
	};

	clang::FunctionTemplateDecl *CreateFunctionTemplateDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	clang::FunctionDecl *func_decl, const TemplateParameterInfos &infos);

	void CreateFunctionTemplateSpecializationInfo(
	clang::FunctionDecl func_decl, clang::FunctionTemplateDecl Template,
	const TemplateParameterInfos &infos);

	clang::ClassTemplateDecl *CreateClassTemplateDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	lldb::AccessType access_type, llvm::StringRef class_name, int kind,
	const TemplateParameterInfos &infos);

	clang::TemplateTemplateParmDecl *
	CreateTemplateTemplateParmDecl(const char *template_name);

	clang::ClassTemplateSpecializationDecl *CreateClassTemplateSpecializationDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	clang::ClassTemplateDecl *class_template_decl, int kind,
	const TemplateParameterInfos &infos);

	CompilerType
	CreateClassTemplateSpecializationType(clang::ClassTemplateSpecializationDecl *
	class_template_specialization_decl);

	static clang::DeclContext *
	GetAsDeclContext(clang::FunctionDecl *function_decl);

	static bool CheckOverloadedOperatorKindParameterCount(
	bool is_method, clang::OverloadedOperatorKind op_kind,
	uint32_t num_params);

	bool FieldIsBitfield(clang::FieldDecl *field, uint32_t &bitfield_bit_size);

	static bool RecordHasFields(const clang::RecordDecl *record_decl);

	CompilerType CreateObjCClass(llvm::StringRef name,
	clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module,
	bool isForwardDecl, bool isInternal,
	ClangASTMetadata *metadata = nullptr);

	// Returns a mask containing bits from the TypeSystemClang::eTypeXXX
	// enumerations

	// Namespace Declarations

	clang::NamespaceDecl *
	GetUniqueNamespaceDeclaration(const char name, clang::DeclContext decl_ctx,
	OptionalClangModuleID owning_module,
	bool is_inline = false);

	// Function Types

	clang::FunctionDecl *CreateFunctionDeclaration(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	llvm::StringRef name, const CompilerType &function_Type,
	clang::StorageClass storage, bool is_inline);

	CompilerType CreateFunctionType(const CompilerType &result_type,
	const CompilerType *args, unsigned num_args,
	bool is_variadic, unsigned type_quals,
	clang::CallingConv cc = clang::CC_C);

	clang::ParmVarDecl *
	CreateParameterDeclaration(clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module,
	const char *name, const CompilerType &param_type,
	int storage, bool add_decl = false);

	void SetFunctionParameters(clang::FunctionDecl *function_decl,
	llvm::ArrayRef<clang::ParmVarDecl *> params);

	CompilerType CreateBlockPointerType(const CompilerType &function_type);

	// Array Types

	CompilerType CreateArrayType(const CompilerType &element_type,
	size_t element_count, bool is_vector);

	// Enumeration Types
	CompilerType CreateEnumerationType(llvm::StringRef name,
	clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module,
	const Declaration &decl,
	const CompilerType &integer_qual_type,
	bool is_scoped);

	// Integer type functions

	CompilerType GetIntTypeFromBitSize(size_t bit_size, bool is_signed);

	CompilerType GetPointerSizedIntType(bool is_signed);

	// Floating point functions

	static CompilerType GetFloatTypeFromBitSize(clang::ASTContext *ast,
	size_t bit_size);

	// TypeSystem methods
	DWARFASTParser *GetDWARFParser() override;
	PDBASTParser *GetPDBParser() override;

	// TypeSystemClang callbacks for external source lookups.
	void CompleteTagDecl(clang::TagDecl *);

	void CompleteObjCInterfaceDecl(clang::ObjCInterfaceDecl *);

	bool LayoutRecordType(
	const clang::RecordDecl *record_decl, uint64_t &size, uint64_t &alignment,
	llvm::DenseMap<const clang::FieldDecl *, uint64_t> &field_offsets,
	llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
	&base_offsets,
	llvm::DenseMap<const clang::CXXRecordDecl *, clang::CharUnits>
	&vbase_offsets);

	/// Creates a CompilerDecl from the given Decl with the current
	/// TypeSystemClang instance as its typesystem.
	/// The Decl has to come from the ASTContext of this
	/// TypeSystemClang.
	CompilerDecl GetCompilerDecl(clang::Decl *decl) {
	assert(&decl->getASTContext() == &getASTContext() &&
	"CreateCompilerDecl for Decl from wrong ASTContext?");
	return CompilerDecl(this, decl);
	}

	// CompilerDecl override functions
	ConstString DeclGetName(void *opaque_decl) override;

	ConstString DeclGetMangledName(void *opaque_decl) override;

	CompilerDeclContext DeclGetDeclContext(void *opaque_decl) override;

	CompilerType DeclGetFunctionReturnType(void *opaque_decl) override;

	size_t DeclGetFunctionNumArguments(void *opaque_decl) override;

	CompilerType DeclGetFunctionArgumentType(void *opaque_decl,
	size_t arg_idx) override;

	CompilerType GetTypeForDecl(void *opaque_decl) override;

	// CompilerDeclContext override functions

	/// Creates a CompilerDeclContext from the given DeclContext
	/// with the current TypeSystemClang instance as its typesystem.
	/// The DeclContext has to come from the ASTContext of this
	/// TypeSystemClang.
	CompilerDeclContext CreateDeclContext(clang::DeclContext *ctx);

	/// Set the owning module for \p decl.
	static void SetOwningModule(clang::Decl *decl,
	OptionalClangModuleID owning_module);

	std::vector<CompilerDecl>
	DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
	const bool ignore_using_decls) override;

	ConstString DeclContextGetName(void *opaque_decl_ctx) override;

	ConstString DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override;

	bool DeclContextIsClassMethod(void *opaque_decl_ctx,
	lldb::LanguageType *language_ptr,
	bool *is_instance_method_ptr,
	ConstString *language_object_name_ptr) override;

	bool DeclContextIsContainedInLookup(void *opaque_decl_ctx,
	void *other_opaque_decl_ctx) override;

	// Clang specific clang::DeclContext functions

	static clang::DeclContext *
	DeclContextGetAsDeclContext(const CompilerDeclContext &dc);

	static clang::ObjCMethodDecl *
	DeclContextGetAsObjCMethodDecl(const CompilerDeclContext &dc);

	static clang::CXXMethodDecl *
	DeclContextGetAsCXXMethodDecl(const CompilerDeclContext &dc);

	static clang::FunctionDecl *
	DeclContextGetAsFunctionDecl(const CompilerDeclContext &dc);

	static clang::NamespaceDecl *
	DeclContextGetAsNamespaceDecl(const CompilerDeclContext &dc);

	static ClangASTMetadata *DeclContextGetMetaData(const CompilerDeclContext &dc,
	const clang::Decl *object);

	static clang::ASTContext *
	DeclContextGetTypeSystemClang(const CompilerDeclContext &dc);

	// Tests

	#ifndef NDEBUG
	bool Verify(lldb::opaque_compiler_type_t type) override;
	#endif
	-
	+
	bool IsArrayType(lldb::opaque_compiler_type_t type,
	CompilerType element_type, uint64_t size,
	bool *is_incomplete) override;

	bool IsVectorType(lldb::opaque_compiler_type_t type,
	CompilerType element_type, uint64_t size) override;

	bool IsAggregateType(lldb::opaque_compiler_type_t type) override;

	bool IsAnonymousType(lldb::opaque_compiler_type_t type) override;

	bool IsBeingDefined(lldb::opaque_compiler_type_t type) override;

	bool IsCharType(lldb::opaque_compiler_type_t type) override;

	bool IsCompleteType(lldb::opaque_compiler_type_t type) override;

	bool IsConst(lldb::opaque_compiler_type_t type) override;

	bool IsCStringType(lldb::opaque_compiler_type_t type,
	uint32_t &length) override;

	static bool IsCXXClassType(const CompilerType &type);

	bool IsDefined(lldb::opaque_compiler_type_t type) override;

	bool IsFloatingPointType(lldb::opaque_compiler_type_t type, uint32_t &count,
	bool &is_complex) override;

	bool IsFunctionType(lldb::opaque_compiler_type_t type) override;

	uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type,
	CompilerType *base_type_ptr) override;

	size_t
	GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) override;

	CompilerType GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type,
	const size_t index) override;

	bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) override;

	bool IsBlockPointerType(lldb::opaque_compiler_type_t type,
	CompilerType *function_pointer_type_ptr) override;

	bool IsIntegerType(lldb::opaque_compiler_type_t type,
	bool &is_signed) override;

	bool IsEnumerationType(lldb::opaque_compiler_type_t type,
	bool &is_signed) override;

	bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) override;

	static bool IsObjCClassType(const CompilerType &type);

	static bool IsObjCClassTypeAndHasIVars(const CompilerType &type,
	bool check_superclass);

	static bool IsObjCObjectOrInterfaceType(const CompilerType &type);

	static bool IsObjCObjectPointerType(const CompilerType &type,
	CompilerType *target_type = nullptr);

	bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) override;

	static bool IsClassType(lldb::opaque_compiler_type_t type);

	static bool IsEnumType(lldb::opaque_compiler_type_t type);

	bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type,
	CompilerType *target_type, // Can pass nullptr
	bool check_cplusplus, bool check_objc) override;

	bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) override;

	bool IsPointerType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type) override;

	bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_type) override;

	bool IsReferenceType(lldb::opaque_compiler_type_t type,
	CompilerType pointee_type, bool is_rvalue) override;

	bool IsScalarType(lldb::opaque_compiler_type_t type) override;

	bool IsTypedefType(lldb::opaque_compiler_type_t type) override;

	bool IsVoidType(lldb::opaque_compiler_type_t type) override;

	bool CanPassInRegisters(const CompilerType &type) override;

	bool SupportsLanguage(lldb::LanguageType language) override;

	static llvm::Optional<std::string> GetCXXClassName(const CompilerType &type);

	// Type Completion

	bool GetCompleteType(lldb::opaque_compiler_type_t type) override;

	// Accessors

	ConstString GetTypeName(lldb::opaque_compiler_type_t type) override;

	ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) override;

	uint32_t GetTypeInfo(lldb::opaque_compiler_type_t type,
	CompilerType *pointee_or_element_compiler_type) override;

	lldb::LanguageType
	GetMinimumLanguage(lldb::opaque_compiler_type_t type) override;

	lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) override;

	unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) override;

	// Creating related types

	CompilerType GetArrayElementType(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) override;

	CompilerType GetArrayType(lldb::opaque_compiler_type_t type,
	uint64_t size) override;

	CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) override;

	CompilerType
	GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) override;

	CompilerType
	GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) override;

	// Returns -1 if this isn't a function of if the function doesn't have a
	// prototype Returns a value >= 0 if there is a prototype.
	int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) override;

	CompilerType GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx) override;

	CompilerType
	GetFunctionReturnType(lldb::opaque_compiler_type_t type) override;

	size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) override;

	TypeMemberFunctionImpl
	GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx) override;

	CompilerType GetNonReferenceType(lldb::opaque_compiler_type_t type) override;

	CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) override;

	CompilerType GetPointerType(lldb::opaque_compiler_type_t type) override;

	CompilerType
	GetLValueReferenceType(lldb::opaque_compiler_type_t type) override;

	CompilerType
	GetRValueReferenceType(lldb::opaque_compiler_type_t type) override;

	CompilerType GetAtomicType(lldb::opaque_compiler_type_t type) override;

	CompilerType AddConstModifier(lldb::opaque_compiler_type_t type) override;

	CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type) override;

	CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type) override;

	/// Using the current type, create a new typedef to that type using
	/// "typedef_name" as the name and "decl_ctx" as the decl context.
	/// \param opaque_payload is an opaque TypePayloadClang.
	CompilerType CreateTypedef(lldb::opaque_compiler_type_t type,
	const char *name,
	const CompilerDeclContext &decl_ctx,
	uint32_t opaque_payload) override;

	// If the current object represents a typedef type, get the underlying type
	CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) override;

	// Create related types using the current type's AST
	CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) override;

	// Exploring the type

	const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) override;

	llvm::Optional<uint64_t> GetByteSize(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) {
	if (llvm::Optional<uint64_t> bit_size = GetBitSize(type, exe_scope))
	return (*bit_size + 7) / 8;
	return llvm::None;
	}

	llvm::Optional<uint64_t>
	GetBitSize(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) override;

	lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type,
	uint64_t &count) override;

	lldb::Format GetFormat(lldb::opaque_compiler_type_t type) override;

	llvm::Optional<size_t>
	GetTypeBitAlign(lldb::opaque_compiler_type_t type,
	ExecutionContextScope *exe_scope) override;

	uint32_t GetNumChildren(lldb::opaque_compiler_type_t type,
	bool omit_empty_base_classes,
	const ExecutionContext *exe_ctx) override;

	CompilerType GetBuiltinTypeByName(ConstString name) override;

	lldb::BasicType
	GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) override;

	static lldb::BasicType
	GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type,
	ConstString name);

	void ForEachEnumerator(
	lldb::opaque_compiler_type_t type,
	std::function<bool(const CompilerType &integer_type,
	ConstString name,
	const llvm::APSInt &value)> const &callback) override;

	uint32_t GetNumFields(lldb::opaque_compiler_type_t type) override;

	CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type, size_t idx,
	std::string &name, uint64_t *bit_offset_ptr,
	uint32_t *bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) override;

	uint32_t GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) override;

	uint32_t GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) override;

	CompilerType GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx,
	uint32_t *bit_offset_ptr) override;

	CompilerType GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type,
	size_t idx,
	uint32_t *bit_offset_ptr) override;

	static uint32_t GetNumPointeeChildren(clang::QualType type);

	CompilerType GetChildCompilerTypeAtIndex(
	lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx,
	bool transparent_pointers, bool omit_empty_base_classes,
	bool ignore_array_bounds, std::string &child_name,
	uint32_t &child_byte_size, int32_t &child_byte_offset,
	uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset,
	bool &child_is_base_class, bool &child_is_deref_of_parent,
	ValueObject *valobj, uint64_t &language_flags) override;

	// Lookup a child given a name. This function will match base class names and
	// member member names in "clang_type" only, not descendants.
	uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type,
	const char *name,
	bool omit_empty_base_classes) override;

	// Lookup a child member given a name. This function will match member names
	// only and will descend into "clang_type" children in search for the first
	// member in this class, or any base class that matches "name".
	// TODO: Return all matches for a given name by returning a
	// vector<vector<uint32_t>>
	// so we catch all names that match a given child name, not just the first.
	size_t
	GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type,
	const char *name, bool omit_empty_base_classes,
	std::vector<uint32_t> &child_indexes) override;

	- size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override;
	+ size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
	+ bool expand_pack) override;

	lldb::TemplateArgumentKind
	- GetTemplateArgumentKind(lldb::opaque_compiler_type_t type,
	- size_t idx) override;
	+ GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack) override;
	CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type,
	- size_t idx) override;
	+ size_t idx, bool expand_pack) override;
	llvm::Optional<CompilerType::IntegralTemplateArgument>
	- GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type,
	- size_t idx) override;
	+ GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack) override;

	CompilerType GetTypeForFormatters(void *type) override;

	#define LLDB_INVALID_DECL_LEVEL UINT32_MAX
	// LLDB_INVALID_DECL_LEVEL is returned by CountDeclLevels if child_decl_ctx
	// could not be found in decl_ctx.
	uint32_t CountDeclLevels(clang::DeclContext *frame_decl_ctx,
	clang::DeclContext *child_decl_ctx,
	ConstString *child_name = nullptr,
	CompilerType *child_type = nullptr);

	// Modifying RecordType
	static clang::FieldDecl *AddFieldToRecordType(const CompilerType &type,
	llvm::StringRef name,
	const CompilerType &field_type,
	lldb::AccessType access,
	uint32_t bitfield_bit_size);

	static void BuildIndirectFields(const CompilerType &type);

	static void SetIsPacked(const CompilerType &type);

	static clang::VarDecl *AddVariableToRecordType(const CompilerType &type,
	llvm::StringRef name,
	const CompilerType &var_type,
	lldb::AccessType access);

	/// Initializes a variable with an integer value.
	/// \param var The variable to initialize. Must not already have an
	/// initializer and must have an integer or enum type.
	/// \param init_value The integer value that the variable should be
	/// initialized to. Has to match the bit width of the
	/// variable type.
	static void SetIntegerInitializerForVariable(clang::VarDecl *var,
	const llvm::APInt &init_value);

	/// Initializes a variable with a floating point value.
	/// \param var The variable to initialize. Must not already have an
	/// initializer and must have a floating point type.
	/// \param init_value The float value that the variable should be
	/// initialized to.
	static void
	SetFloatingInitializerForVariable(clang::VarDecl *var,
	const llvm::APFloat &init_value);

	clang::CXXMethodDecl *AddMethodToCXXRecordType(
	lldb::opaque_compiler_type_t type, llvm::StringRef name,
	const char *mangled_name, const CompilerType &method_type,
	lldb::AccessType access, bool is_virtual, bool is_static, bool is_inline,
	bool is_explicit, bool is_attr_used, bool is_artificial);

	void AddMethodOverridesForCXXRecordType(lldb::opaque_compiler_type_t type);

	// C++ Base Classes
	std::unique_ptr<clang::CXXBaseSpecifier>
	CreateBaseClassSpecifier(lldb::opaque_compiler_type_t type,
	lldb::AccessType access, bool is_virtual,
	bool base_of_class);

	bool TransferBaseClasses(
	lldb::opaque_compiler_type_t type,
	std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> bases);

	static bool SetObjCSuperClass(const CompilerType &type,
	const CompilerType &superclass_compiler_type);

	static bool AddObjCClassProperty(const CompilerType &type,
	const char *property_name,
	const CompilerType &property_compiler_type,
	clang::ObjCIvarDecl *ivar_decl,
	const char *property_setter_name,
	const char *property_getter_name,
	uint32_t property_attributes,
	ClangASTMetadata *metadata);

	static clang::ObjCMethodDecl *AddMethodToObjCObjectType(
	const CompilerType &type,
	const char *name, // the full symbol name as seen in the symbol table
	// (lldb::opaque_compiler_type_t type, "-[NString
	// stringWithCString:]")
	const CompilerType &method_compiler_type, lldb::AccessType access,
	bool is_artificial, bool is_variadic, bool is_objc_direct_call);

	static bool SetHasExternalStorage(lldb::opaque_compiler_type_t type,
	bool has_extern);

	// Tag Declarations
	static bool StartTagDeclarationDefinition(const CompilerType &type);

	static bool CompleteTagDeclarationDefinition(const CompilerType &type);

	// Modifying Enumeration types
	clang::EnumConstantDecl *AddEnumerationValueToEnumerationType(
	const CompilerType &enum_type, const Declaration &decl, const char *name,
	int64_t enum_value, uint32_t enum_value_bit_size);
	clang::EnumConstantDecl *AddEnumerationValueToEnumerationType(
	const CompilerType &enum_type, const Declaration &decl, const char *name,
	const llvm::APSInt &value);

	/// Returns the underlying integer type for an enum type. If the given type
	/// is invalid or not an enum-type, the function returns an invalid
	/// CompilerType.
	CompilerType GetEnumerationIntegerType(CompilerType type);

	// Pointers & References

	// Call this function using the class type when you want to make a member
	// pointer type to pointee_type.
	static CompilerType CreateMemberPointerType(const CompilerType &type,
	const CompilerType &pointee_type);

	// Dumping types
	#ifndef NDEBUG
	/// Convenience LLVM-style dump method for use in the debugger only.
	/// In contrast to the other \p Dump() methods this directly invokes
	/// \p clang::QualType::dump().
	LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override;
	#endif

	/// \see lldb_private::TypeSystem::Dump
	void Dump(llvm::raw_ostream &output) override;

	/// Dump clang AST types from the symbol file.
	///
	/// \param[in] s
	/// A stream to send the dumped AST node(s) to
	/// \param[in] symbol_name
	/// The name of the symbol to dump, if it is empty dump all the symbols
	void DumpFromSymbolFile(Stream &s, llvm::StringRef symbol_name);

	void DumpValue(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx,
	Stream *s, lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
	bool show_types, bool show_summary, bool verbose,
	uint32_t depth) override;

	bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s,
	lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size,
	uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset,
	ExecutionContextScope *exe_scope) override;

	void DumpSummary(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx,
	Stream *s, const DataExtractor &data,
	lldb::offset_t data_offset, size_t data_byte_size) override;

	void DumpTypeDescription(
	lldb::opaque_compiler_type_t type,
	lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;

	void DumpTypeDescription(
	lldb::opaque_compiler_type_t type, Stream *s,
	lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;

	static void DumpTypeName(const CompilerType &type);

	static clang::EnumDecl *GetAsEnumDecl(const CompilerType &type);

	static clang::RecordDecl *GetAsRecordDecl(const CompilerType &type);

	static clang::TagDecl *GetAsTagDecl(const CompilerType &type);

	static clang::TypedefNameDecl *GetAsTypedefDecl(const CompilerType &type);

	static clang::CXXRecordDecl *
	GetAsCXXRecordDecl(lldb::opaque_compiler_type_t type);

	static clang::ObjCInterfaceDecl *
	GetAsObjCInterfaceDecl(const CompilerType &type);

	clang::ClassTemplateDecl *ParseClassTemplateDecl(
	clang::DeclContext *decl_ctx, OptionalClangModuleID owning_module,
	lldb::AccessType access_type, const char *parent_name, int tag_decl_kind,
	const TypeSystemClang::TemplateParameterInfos &template_param_infos);

	clang::BlockDecl CreateBlockDeclaration(clang::DeclContext ctx,
	OptionalClangModuleID owning_module);

	clang::UsingDirectiveDecl *
	CreateUsingDirectiveDeclaration(clang::DeclContext *decl_ctx,
	OptionalClangModuleID owning_module,
	clang::NamespaceDecl *ns_decl);

	clang::UsingDecl CreateUsingDeclaration(clang::DeclContext current_decl_ctx,
	OptionalClangModuleID owning_module,
	clang::NamedDecl *target);

	clang::VarDecl CreateVariableDeclaration(clang::DeclContext decl_context,
	OptionalClangModuleID owning_module,
	const char *name,
	clang::QualType type);

	static lldb::opaque_compiler_type_t
	GetOpaqueCompilerType(clang::ASTContext *ast, lldb::BasicType basic_type);

	static clang::QualType GetQualType(lldb::opaque_compiler_type_t type) {
	if (type)
	return clang::QualType::getFromOpaquePtr(type);
	return clang::QualType();
	}

	static clang::QualType
	GetCanonicalQualType(lldb::opaque_compiler_type_t type) {
	if (type)
	return clang::QualType::getFromOpaquePtr(type).getCanonicalType();
	return clang::QualType();
	}

	clang::DeclarationName
	GetDeclarationName(llvm::StringRef name,
	const CompilerType &function_clang_type);

	clang::LangOptions *GetLangOpts() const {
	return m_language_options_up.get();
	}
	clang::SourceManager *GetSourceMgr() const {
	return m_source_manager_up.get();
	}

	private:
	/// Returns the PrintingPolicy used when generating the internal type names.
	/// These type names are mostly used for the formatter selection.
	clang::PrintingPolicy GetTypePrintingPolicy();
	/// Returns the internal type name for the given NamedDecl using the
	/// type printing policy.
	std::string GetTypeNameForDecl(const clang::NamedDecl *named_decl);

	const clang::ClassTemplateSpecializationDecl *
	GetAsTemplateSpecialization(lldb::opaque_compiler_type_t type);

	// Classes that inherit from TypeSystemClang can see and modify these
	std::string m_target_triple;
	std::unique_ptr<clang::ASTContext> m_ast_up;
	std::unique_ptr<clang::LangOptions> m_language_options_up;
	std::unique_ptr<clang::FileManager> m_file_manager_up;
	std::unique_ptr<clang::SourceManager> m_source_manager_up;
	std::unique_ptr<clang::DiagnosticsEngine> m_diagnostics_engine_up;
	std::unique_ptr<clang::DiagnosticConsumer> m_diagnostic_consumer_up;
	std::shared_ptr<clang::TargetOptions> m_target_options_rp;
	std::unique_ptr<clang::TargetInfo> m_target_info_up;
	std::unique_ptr<clang::IdentifierTable> m_identifier_table_up;
	std::unique_ptr<clang::SelectorTable> m_selector_table_up;
	std::unique_ptr<clang::Builtin::Context> m_builtins_up;
	std::unique_ptr<clang::HeaderSearch> m_header_search_up;
	std::unique_ptr<clang::ModuleMap> m_module_map_up;
	std::unique_ptr<DWARFASTParserClang> m_dwarf_ast_parser_up;
	std::unique_ptr<PDBASTParser> m_pdb_ast_parser_up;
	std::unique_ptr<clang::MangleContext> m_mangle_ctx_up;
	uint32_t m_pointer_byte_size = 0;
	bool m_ast_owned = false;
	/// A string describing what this TypeSystemClang represents (e.g.,
	/// AST for debug information, an expression, some other utility ClangAST).
	/// Useful for logging and debugging.
	std::string m_display_name;

	typedef llvm::DenseMap<const clang::Decl *, ClangASTMetadata> DeclMetadataMap;
	/// Maps Decls to their associated ClangASTMetadata.
	DeclMetadataMap m_decl_metadata;

	typedef llvm::DenseMap<const clang::Type *, ClangASTMetadata> TypeMetadataMap;
	/// Maps Types to their associated ClangASTMetadata.
	TypeMetadataMap m_type_metadata;

	typedef llvm::DenseMap<const clang::CXXRecordDecl *, clang::AccessSpecifier>
	CXXRecordDeclAccessMap;
	/// Maps CXXRecordDecl to their most recent added method/field's
	/// AccessSpecifier.
	CXXRecordDeclAccessMap m_cxx_record_decl_access;

	/// The sema associated that is currently used to build this ASTContext.
	/// May be null if we are already done parsing this ASTContext or the
	/// ASTContext wasn't created by parsing source code.
	clang::Sema *m_sema = nullptr;

	// For TypeSystemClang only
	TypeSystemClang(const TypeSystemClang &);
	const TypeSystemClang &operator=(const TypeSystemClang &);
	/// Creates the internal ASTContext.
	void CreateASTContext();
	void SetTargetTriple(llvm::StringRef target_triple);
	};

	/// The TypeSystemClang instance used for the scratch ASTContext in a
	/// lldb::Target.
	class ScratchTypeSystemClang : public TypeSystemClang {
	/// LLVM RTTI support
	static char ID;

	public:
	ScratchTypeSystemClang(Target &target, llvm::Triple triple);

	~ScratchTypeSystemClang() override = default;

	void Finalize() override;

	/// The different kinds of isolated ASTs within the scratch TypeSystem.
	///
	/// These ASTs are isolated from the main scratch AST and are each
	/// dedicated to a special language option/feature that makes the contained
	/// AST nodes incompatible with other AST nodes.
	enum IsolatedASTKind {
	/// The isolated AST for declarations/types from expressions that imported
	/// type information from a C++ module. The templates from a C++ module
	/// often conflict with the templates we generate from debug information,
	/// so we put these types in their own AST.
	CppModules
	};

	/// Alias for requesting the default scratch TypeSystemClang in GetForTarget.
	// This isn't constexpr as gtest/llvm::Optional comparison logic is trying
	// to get the address of this for pretty-printing.
	static const llvm::NoneType DefaultAST;

	/// Infers the appropriate sub-AST from Clang's LangOptions.
	static llvm::Optional<IsolatedASTKind>
	InferIsolatedASTKindFromLangOpts(const clang::LangOptions &l) {
	// If modules are activated we want the dedicated C++ module AST.
	// See IsolatedASTKind::CppModules for more info.
	if (l.Modules)
	return IsolatedASTKind::CppModules;
	return DefaultAST;
	}

	/// Returns the scratch TypeSystemClang for the given target.
	/// \param target The Target which scratch TypeSystemClang should be returned.
	/// \param ast_kind Allows requesting a specific sub-AST instead of the
	/// default scratch AST. See also `IsolatedASTKind`.
	/// \param create_on_demand If the scratch TypeSystemClang instance can be
	/// created by this call if it doesn't exist yet. If it doesn't exist yet and
	/// this parameter is false, this function returns a nullptr.
	/// \return The scratch type system of the target or a nullptr in case an
	/// error occurred.
	static TypeSystemClang *
	GetForTarget(Target &target,
	llvm::Optional<IsolatedASTKind> ast_kind = DefaultAST,
	bool create_on_demand = true);

	/// Returns the scratch TypeSystemClang for the given target. The returned
	/// TypeSystemClang will be the scratch AST or a sub-AST, depending on which
	/// fits best to the passed LangOptions.
	/// \param target The Target which scratch TypeSystemClang should be returned.
	/// \param lang_opts The LangOptions of a clang ASTContext that the caller
	/// wants to export type information from. This is used to
	/// find the best matching sub-AST that will be returned.
	static TypeSystemClang *GetForTarget(Target &target,
	const clang::LangOptions &lang_opts) {
	return GetForTarget(target, InferIsolatedASTKindFromLangOpts(lang_opts));
	}

	/// \see lldb_private::TypeSystem::Dump
	void Dump(llvm::raw_ostream &output) override;

	UserExpression *
	GetUserExpression(llvm::StringRef expr, llvm::StringRef prefix,
	lldb::LanguageType language,
	Expression::ResultType desired_type,
	const EvaluateExpressionOptions &options,
	ValueObject *ctx_obj) override;

	FunctionCaller *GetFunctionCaller(const CompilerType &return_type,
	const Address &function_address,
	const ValueList &arg_value_list,
	const char *name) override;

	std::unique_ptr<UtilityFunction>
	CreateUtilityFunction(std::string text, std::string name) override;

	PersistentExpressionState *GetPersistentExpressionState() override;

	/// Unregisters the given ASTContext as a source from the scratch AST (and
	/// all sub-ASTs).
	/// \see ClangASTImporter::ForgetSource
	void ForgetSource(clang::ASTContext *src_ctx, ClangASTImporter &importer);

	// llvm casting support
	bool isA(const void *ClassID) const override {
	return ClassID == &ID \|\| TypeSystemClang::isA(ClassID);
	}
	static bool classof(const TypeSystem *ts) { return ts->isA(&ID); }

	private:
	std::unique_ptr<ClangASTSource> CreateASTSource();
	/// Returns the requested sub-AST.
	/// Will lazily create the sub-AST if it hasn't been created before.
	TypeSystemClang &GetIsolatedAST(IsolatedASTKind feature);

	/// The target triple.
	/// This was potentially adjusted and might not be identical to the triple
	/// of `m_target_wp`.
	llvm::Triple m_triple;
	lldb::TargetWP m_target_wp;
	/// The persistent variables associated with this process for the expression
	/// parser.
	std::unique_ptr<ClangPersistentVariables> m_persistent_variables;
	/// The ExternalASTSource that performs lookups and completes minimally
	/// imported types.
	std::unique_ptr<ClangASTSource> m_scratch_ast_source_up;

	// FIXME: GCC 5.x doesn't support enum as map keys.
	typedef int IsolatedASTKey;

	/// Map from IsolatedASTKind to their actual TypeSystemClang instance.
	/// This map is lazily filled with sub-ASTs and should be accessed via
	/// `GetSubAST` (which lazily fills this map).
	std::unordered_map<IsolatedASTKey, std::unique_ptr<TypeSystemClang>>
	m_isolated_asts;
	};

	} // namespace lldb_private

	#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_CLANG_TYPESYSTEMCLANG_H
	diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
	index ac98352c235e..bef456583687 100644
	--- a/lldb/source/Symbol/CompilerType.cpp
	+++ b/lldb/source/Symbol/CompilerType.cpp
	@@ -1,896 +1,898 @@
	//===-- CompilerType.cpp --------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "lldb/Symbol/CompilerType.h"

	#include "lldb/Core/Debugger.h"
	#include "lldb/Core/StreamFile.h"
	#include "lldb/Symbol/Type.h"
	#include "lldb/Target/ExecutionContext.h"
	#include "lldb/Target/Process.h"
	#include "lldb/Utility/ConstString.h"
	#include "lldb/Utility/DataBufferHeap.h"
	#include "lldb/Utility/DataExtractor.h"
	#include "lldb/Utility/Scalar.h"
	#include "lldb/Utility/Stream.h"
	#include "lldb/Utility/StreamString.h"

	#include <iterator>
	#include <mutex>

	using namespace lldb;
	using namespace lldb_private;

	// Tests

	bool CompilerType::IsAggregateType() const {
	if (IsValid())
	return m_type_system->IsAggregateType(m_type);
	return false;
	}

	bool CompilerType::IsAnonymousType() const {
	if (IsValid())
	return m_type_system->IsAnonymousType(m_type);
	return false;
	}

	bool CompilerType::IsScopedEnumerationType() const {
	if (IsValid())
	return m_type_system->IsScopedEnumerationType(m_type);
	return false;
	}

	bool CompilerType::IsArrayType(CompilerType element_type_ptr, uint64_t size,
	bool *is_incomplete) const {
	if (IsValid())
	return m_type_system->IsArrayType(m_type, element_type_ptr, size,
	is_incomplete);

	if (element_type_ptr)
	element_type_ptr->Clear();
	if (size)
	*size = 0;
	if (is_incomplete)
	*is_incomplete = false;
	return false;
	}

	bool CompilerType::IsVectorType(CompilerType *element_type,
	uint64_t *size) const {
	if (IsValid())
	return m_type_system->IsVectorType(m_type, element_type, size);
	return false;
	}

	bool CompilerType::IsRuntimeGeneratedType() const {
	if (IsValid())
	return m_type_system->IsRuntimeGeneratedType(m_type);
	return false;
	}

	bool CompilerType::IsCharType() const {
	if (IsValid())
	return m_type_system->IsCharType(m_type);
	return false;
	}

	bool CompilerType::IsCompleteType() const {
	if (IsValid())
	return m_type_system->IsCompleteType(m_type);
	return false;
	}

	bool CompilerType::IsConst() const {
	if (IsValid())
	return m_type_system->IsConst(m_type);
	return false;
	}

	bool CompilerType::IsCStringType(uint32_t &length) const {
	if (IsValid())
	return m_type_system->IsCStringType(m_type, length);
	return false;
	}

	bool CompilerType::IsFunctionType() const {
	if (IsValid())
	return m_type_system->IsFunctionType(m_type);
	return false;
	}

	// Used to detect "Homogeneous Floating-point Aggregates"
	uint32_t
	CompilerType::IsHomogeneousAggregate(CompilerType *base_type_ptr) const {
	if (IsValid())
	return m_type_system->IsHomogeneousAggregate(m_type, base_type_ptr);
	return 0;
	}

	size_t CompilerType::GetNumberOfFunctionArguments() const {
	if (IsValid())
	return m_type_system->GetNumberOfFunctionArguments(m_type);
	return 0;
	}

	CompilerType
	CompilerType::GetFunctionArgumentAtIndex(const size_t index) const {
	if (IsValid())
	return m_type_system->GetFunctionArgumentAtIndex(m_type, index);
	return CompilerType();
	}

	bool CompilerType::IsFunctionPointerType() const {
	if (IsValid())
	return m_type_system->IsFunctionPointerType(m_type);
	return false;
	}

	bool CompilerType::IsBlockPointerType(
	CompilerType *function_pointer_type_ptr) const {
	if (IsValid())
	return m_type_system->IsBlockPointerType(m_type, function_pointer_type_ptr);
	return false;
	}

	bool CompilerType::IsIntegerType(bool &is_signed) const {
	if (IsValid())
	return m_type_system->IsIntegerType(m_type, is_signed);
	return false;
	}

	bool CompilerType::IsEnumerationType(bool &is_signed) const {
	if (IsValid())
	return m_type_system->IsEnumerationType(m_type, is_signed);
	return false;
	}

	bool CompilerType::IsIntegerOrEnumerationType(bool &is_signed) const {
	return IsIntegerType(is_signed) \|\| IsEnumerationType(is_signed);
	}

	bool CompilerType::IsPointerType(CompilerType *pointee_type) const {
	if (IsValid()) {
	return m_type_system->IsPointerType(m_type, pointee_type);
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool CompilerType::IsPointerOrReferenceType(CompilerType *pointee_type) const {
	if (IsValid()) {
	return m_type_system->IsPointerOrReferenceType(m_type, pointee_type);
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool CompilerType::IsReferenceType(CompilerType *pointee_type,
	bool *is_rvalue) const {
	if (IsValid()) {
	return m_type_system->IsReferenceType(m_type, pointee_type, is_rvalue);
	}
	if (pointee_type)
	pointee_type->Clear();
	return false;
	}

	bool CompilerType::ShouldTreatScalarValueAsAddress() const {
	if (IsValid())
	return m_type_system->ShouldTreatScalarValueAsAddress(m_type);
	return false;
	}

	bool CompilerType::IsFloatingPointType(uint32_t &count,
	bool &is_complex) const {
	if (IsValid()) {
	return m_type_system->IsFloatingPointType(m_type, count, is_complex);
	}
	count = 0;
	is_complex = false;
	return false;
	}

	bool CompilerType::IsDefined() const {
	if (IsValid())
	return m_type_system->IsDefined(m_type);
	return true;
	}

	bool CompilerType::IsPolymorphicClass() const {
	if (IsValid()) {
	return m_type_system->IsPolymorphicClass(m_type);
	}
	return false;
	}

	bool CompilerType::IsPossibleDynamicType(CompilerType *dynamic_pointee_type,
	bool check_cplusplus,
	bool check_objc) const {
	if (IsValid())
	return m_type_system->IsPossibleDynamicType(m_type, dynamic_pointee_type,
	check_cplusplus, check_objc);
	return false;
	}

	bool CompilerType::IsScalarType() const {
	if (!IsValid())
	return false;

	return m_type_system->IsScalarType(m_type);
	}

	bool CompilerType::IsTypedefType() const {
	if (!IsValid())
	return false;
	return m_type_system->IsTypedefType(m_type);
	}

	bool CompilerType::IsVoidType() const {
	if (!IsValid())
	return false;
	return m_type_system->IsVoidType(m_type);
	}

	bool CompilerType::IsPointerToScalarType() const {
	if (!IsValid())
	return false;

	return IsPointerType() && GetPointeeType().IsScalarType();
	}

	bool CompilerType::IsArrayOfScalarType() const {
	CompilerType element_type;
	if (IsArrayType(&element_type))
	return element_type.IsScalarType();
	return false;
	}

	bool CompilerType::IsBeingDefined() const {
	if (!IsValid())
	return false;
	return m_type_system->IsBeingDefined(m_type);
	}

	// Type Completion

	bool CompilerType::GetCompleteType() const {
	if (!IsValid())
	return false;
	return m_type_system->GetCompleteType(m_type);
	}

	// AST related queries
	size_t CompilerType::GetPointerByteSize() const {
	if (m_type_system)
	return m_type_system->GetPointerByteSize();
	return 0;
	}

	ConstString CompilerType::GetTypeName() const {
	if (IsValid()) {
	return m_type_system->GetTypeName(m_type);
	}
	return ConstString("<invalid>");
	}

	ConstString CompilerType::GetDisplayTypeName() const {
	if (IsValid())
	return m_type_system->GetDisplayTypeName(m_type);
	return ConstString("<invalid>");
	}

	uint32_t CompilerType::GetTypeInfo(
	CompilerType *pointee_or_element_compiler_type) const {
	if (!IsValid())
	return 0;

	return m_type_system->GetTypeInfo(m_type, pointee_or_element_compiler_type);
	}

	lldb::LanguageType CompilerType::GetMinimumLanguage() {
	if (!IsValid())
	return lldb::eLanguageTypeC;

	return m_type_system->GetMinimumLanguage(m_type);
	}

	lldb::TypeClass CompilerType::GetTypeClass() const {
	if (!IsValid())
	return lldb::eTypeClassInvalid;

	return m_type_system->GetTypeClass(m_type);
	}

	void CompilerType::SetCompilerType(TypeSystem *type_system,
	lldb::opaque_compiler_type_t type) {
	m_type_system = type_system;
	m_type = type;
	}

	unsigned CompilerType::GetTypeQualifiers() const {
	if (IsValid())
	return m_type_system->GetTypeQualifiers(m_type);
	return 0;
	}

	// Creating related types

	CompilerType
	CompilerType::GetArrayElementType(ExecutionContextScope *exe_scope) const {
	if (IsValid()) {
	return m_type_system->GetArrayElementType(m_type, exe_scope);
	}
	return CompilerType();
	}

	CompilerType CompilerType::GetArrayType(uint64_t size) const {
	if (IsValid()) {
	return m_type_system->GetArrayType(m_type, size);
	}
	return CompilerType();
	}

	CompilerType CompilerType::GetCanonicalType() const {
	if (IsValid())
	return m_type_system->GetCanonicalType(m_type);
	return CompilerType();
	}

	CompilerType CompilerType::GetFullyUnqualifiedType() const {
	if (IsValid())
	return m_type_system->GetFullyUnqualifiedType(m_type);
	return CompilerType();
	}

	CompilerType CompilerType::GetEnumerationIntegerType() const {
	if (IsValid())
	return m_type_system->GetEnumerationIntegerType(m_type);
	return CompilerType();
	}

	int CompilerType::GetFunctionArgumentCount() const {
	if (IsValid()) {
	return m_type_system->GetFunctionArgumentCount(m_type);
	}
	return -1;
	}

	CompilerType CompilerType::GetFunctionArgumentTypeAtIndex(size_t idx) const {
	if (IsValid()) {
	return m_type_system->GetFunctionArgumentTypeAtIndex(m_type, idx);
	}
	return CompilerType();
	}

	CompilerType CompilerType::GetFunctionReturnType() const {
	if (IsValid()) {
	return m_type_system->GetFunctionReturnType(m_type);
	}
	return CompilerType();
	}

	size_t CompilerType::GetNumMemberFunctions() const {
	if (IsValid()) {
	return m_type_system->GetNumMemberFunctions(m_type);
	}
	return 0;
	}

	TypeMemberFunctionImpl CompilerType::GetMemberFunctionAtIndex(size_t idx) {
	if (IsValid()) {
	return m_type_system->GetMemberFunctionAtIndex(m_type, idx);
	}
	return TypeMemberFunctionImpl();
	}

	CompilerType CompilerType::GetNonReferenceType() const {
	if (IsValid())
	return m_type_system->GetNonReferenceType(m_type);
	return CompilerType();
	}

	CompilerType CompilerType::GetPointeeType() const {
	if (IsValid()) {
	return m_type_system->GetPointeeType(m_type);
	}
	return CompilerType();
	}

	CompilerType CompilerType::GetPointerType() const {
	if (IsValid()) {
	return m_type_system->GetPointerType(m_type);
	}
	return CompilerType();
	}

	CompilerType CompilerType::GetLValueReferenceType() const {
	if (IsValid())
	return m_type_system->GetLValueReferenceType(m_type);
	else
	return CompilerType();
	}

	CompilerType CompilerType::GetRValueReferenceType() const {
	if (IsValid())
	return m_type_system->GetRValueReferenceType(m_type);
	else
	return CompilerType();
	}

	CompilerType CompilerType::GetAtomicType() const {
	if (IsValid())
	return m_type_system->GetAtomicType(m_type);
	return CompilerType();
	}

	CompilerType CompilerType::AddConstModifier() const {
	if (IsValid())
	return m_type_system->AddConstModifier(m_type);
	else
	return CompilerType();
	}

	CompilerType CompilerType::AddVolatileModifier() const {
	if (IsValid())
	return m_type_system->AddVolatileModifier(m_type);
	else
	return CompilerType();
	}

	CompilerType CompilerType::AddRestrictModifier() const {
	if (IsValid())
	return m_type_system->AddRestrictModifier(m_type);
	else
	return CompilerType();
	}

	CompilerType CompilerType::CreateTypedef(const char *name,
	const CompilerDeclContext &decl_ctx,
	uint32_t payload) const {
	if (IsValid())
	return m_type_system->CreateTypedef(m_type, name, decl_ctx, payload);
	else
	return CompilerType();
	}

	CompilerType CompilerType::GetTypedefedType() const {
	if (IsValid())
	return m_type_system->GetTypedefedType(m_type);
	else
	return CompilerType();
	}

	// Create related types using the current type's AST

	CompilerType
	CompilerType::GetBasicTypeFromAST(lldb::BasicType basic_type) const {
	if (IsValid())
	return m_type_system->GetBasicTypeFromAST(basic_type);
	return CompilerType();
	}
	// Exploring the type

	llvm::Optional<uint64_t>
	CompilerType::GetBitSize(ExecutionContextScope *exe_scope) const {
	if (IsValid())
	return m_type_system->GetBitSize(m_type, exe_scope);
	return {};
	}

	llvm::Optional<uint64_t>
	CompilerType::GetByteSize(ExecutionContextScope *exe_scope) const {
	if (llvm::Optional<uint64_t> bit_size = GetBitSize(exe_scope))
	return (*bit_size + 7) / 8;
	return {};
	}

	llvm::Optional<size_t> CompilerType::GetTypeBitAlign(ExecutionContextScope *exe_scope) const {
	if (IsValid())
	return m_type_system->GetTypeBitAlign(m_type, exe_scope);
	return {};
	}

	lldb::Encoding CompilerType::GetEncoding(uint64_t &count) const {
	if (!IsValid())
	return lldb::eEncodingInvalid;

	return m_type_system->GetEncoding(m_type, count);
	}

	lldb::Format CompilerType::GetFormat() const {
	if (!IsValid())
	return lldb::eFormatDefault;

	return m_type_system->GetFormat(m_type);
	}

	uint32_t CompilerType::GetNumChildren(bool omit_empty_base_classes,
	const ExecutionContext *exe_ctx) const {
	if (!IsValid())
	return 0;
	return m_type_system->GetNumChildren(m_type, omit_empty_base_classes,
	exe_ctx);
	}

	lldb::BasicType CompilerType::GetBasicTypeEnumeration() const {
	if (IsValid())
	return m_type_system->GetBasicTypeEnumeration(m_type);
	return eBasicTypeInvalid;
	}

	void CompilerType::ForEachEnumerator(
	std::function<bool(const CompilerType &integer_type,
	ConstString name,
	const llvm::APSInt &value)> const &callback) const {
	if (IsValid())
	return m_type_system->ForEachEnumerator(m_type, callback);
	}

	uint32_t CompilerType::GetNumFields() const {
	if (!IsValid())
	return 0;
	return m_type_system->GetNumFields(m_type);
	}

	CompilerType CompilerType::GetFieldAtIndex(size_t idx, std::string &name,
	uint64_t *bit_offset_ptr,
	uint32_t *bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) const {
	if (!IsValid())
	return CompilerType();
	return m_type_system->GetFieldAtIndex(m_type, idx, name, bit_offset_ptr,
	bitfield_bit_size_ptr, is_bitfield_ptr);
	}

	uint32_t CompilerType::GetNumDirectBaseClasses() const {
	if (IsValid())
	return m_type_system->GetNumDirectBaseClasses(m_type);
	return 0;
	}

	uint32_t CompilerType::GetNumVirtualBaseClasses() const {
	if (IsValid())
	return m_type_system->GetNumVirtualBaseClasses(m_type);
	return 0;
	}

	CompilerType
	CompilerType::GetDirectBaseClassAtIndex(size_t idx,
	uint32_t *bit_offset_ptr) const {
	if (IsValid())
	return m_type_system->GetDirectBaseClassAtIndex(m_type, idx,
	bit_offset_ptr);
	return CompilerType();
	}

	CompilerType
	CompilerType::GetVirtualBaseClassAtIndex(size_t idx,
	uint32_t *bit_offset_ptr) const {
	if (IsValid())
	return m_type_system->GetVirtualBaseClassAtIndex(m_type, idx,
	bit_offset_ptr);
	return CompilerType();
	}

	uint32_t CompilerType::GetIndexOfFieldWithName(
	const char name, CompilerType field_compiler_type_ptr,
	uint64_t bit_offset_ptr, uint32_t bitfield_bit_size_ptr,
	bool *is_bitfield_ptr) const {
	unsigned count = GetNumFields();
	std::string field_name;
	for (unsigned index = 0; index < count; index++) {
	CompilerType field_compiler_type(
	GetFieldAtIndex(index, field_name, bit_offset_ptr,
	bitfield_bit_size_ptr, is_bitfield_ptr));
	if (strcmp(field_name.c_str(), name) == 0) {
	if (field_compiler_type_ptr)
	*field_compiler_type_ptr = field_compiler_type;
	return index;
	}
	}
	return UINT32_MAX;
	}

	CompilerType CompilerType::GetChildCompilerTypeAtIndex(
	ExecutionContext *exe_ctx, size_t idx, bool transparent_pointers,
	bool omit_empty_base_classes, bool ignore_array_bounds,
	std::string &child_name, uint32_t &child_byte_size,
	int32_t &child_byte_offset, uint32_t &child_bitfield_bit_size,
	uint32_t &child_bitfield_bit_offset, bool &child_is_base_class,
	bool &child_is_deref_of_parent, ValueObject *valobj,
	uint64_t &language_flags) const {
	if (!IsValid())
	return CompilerType();
	return m_type_system->GetChildCompilerTypeAtIndex(
	m_type, exe_ctx, idx, transparent_pointers, omit_empty_base_classes,
	ignore_array_bounds, child_name, child_byte_size, child_byte_offset,
	child_bitfield_bit_size, child_bitfield_bit_offset, child_is_base_class,
	child_is_deref_of_parent, valobj, language_flags);
	}

	// Look for a child member (doesn't include base classes, but it does include
	// their members) in the type hierarchy. Returns an index path into
	// "clang_type" on how to reach the appropriate member.
	//
	// class A
	// {
	// public:
	// int m_a;
	// int m_b;
	// };
	//
	// class B
	// {
	// };
	//
	// class C :
	// public B,
	// public A
	// {
	// };
	//
	// If we have a clang type that describes "class C", and we wanted to looked
	// "m_b" in it:
	//
	// With omit_empty_base_classes == false we would get an integer array back
	// with: { 1, 1 } The first index 1 is the child index for "class A" within
	// class C The second index 1 is the child index for "m_b" within class A
	//
	// With omit_empty_base_classes == true we would get an integer array back
	// with: { 0, 1 } The first index 0 is the child index for "class A" within
	// class C (since class B doesn't have any members it doesn't count) The second
	// index 1 is the child index for "m_b" within class A

	size_t CompilerType::GetIndexOfChildMemberWithName(
	const char *name, bool omit_empty_base_classes,
	std::vector<uint32_t> &child_indexes) const {
	if (IsValid() && name && name[0]) {
	return m_type_system->GetIndexOfChildMemberWithName(
	m_type, name, omit_empty_base_classes, child_indexes);
	}
	return 0;
	}

	-size_t CompilerType::GetNumTemplateArguments() const {
	+size_t CompilerType::GetNumTemplateArguments(bool expand_pack) const {
	if (IsValid()) {
	- return m_type_system->GetNumTemplateArguments(m_type);
	+ return m_type_system->GetNumTemplateArguments(m_type, expand_pack);
	}
	return 0;
	}

	-TemplateArgumentKind CompilerType::GetTemplateArgumentKind(size_t idx) const {
	+TemplateArgumentKind
	+CompilerType::GetTemplateArgumentKind(size_t idx, bool expand_pack) const {
	if (IsValid())
	- return m_type_system->GetTemplateArgumentKind(m_type, idx);
	+ return m_type_system->GetTemplateArgumentKind(m_type, idx, expand_pack);
	return eTemplateArgumentKindNull;
	}

	-CompilerType CompilerType::GetTypeTemplateArgument(size_t idx) const {
	+CompilerType CompilerType::GetTypeTemplateArgument(size_t idx,
	+ bool expand_pack) const {
	if (IsValid()) {
	- return m_type_system->GetTypeTemplateArgument(m_type, idx);
	+ return m_type_system->GetTypeTemplateArgument(m_type, idx, expand_pack);
	}
	return CompilerType();
	}

	llvm::Optional<CompilerType::IntegralTemplateArgument>
	-CompilerType::GetIntegralTemplateArgument(size_t idx) const {
	+CompilerType::GetIntegralTemplateArgument(size_t idx, bool expand_pack) const {
	if (IsValid())
	- return m_type_system->GetIntegralTemplateArgument(m_type, idx);
	+ return m_type_system->GetIntegralTemplateArgument(m_type, idx, expand_pack);
	return llvm::None;
	}

	CompilerType CompilerType::GetTypeForFormatters() const {
	if (IsValid())
	return m_type_system->GetTypeForFormatters(m_type);
	return CompilerType();
	}

	LazyBool CompilerType::ShouldPrintAsOneLiner(ValueObject *valobj) const {
	if (IsValid())
	return m_type_system->ShouldPrintAsOneLiner(m_type, valobj);
	return eLazyBoolCalculate;
	}

	bool CompilerType::IsMeaninglessWithoutDynamicResolution() const {
	if (IsValid())
	return m_type_system->IsMeaninglessWithoutDynamicResolution(m_type);
	return false;
	}

	// Get the index of the child of "clang_type" whose name matches. This function
	// doesn't descend into the children, but only looks one level deep and name
	// matches can include base class names.

	uint32_t
	CompilerType::GetIndexOfChildWithName(const char *name,
	bool omit_empty_base_classes) const {
	if (IsValid() && name && name[0]) {
	return m_type_system->GetIndexOfChildWithName(m_type, name,
	omit_empty_base_classes);
	}
	return UINT32_MAX;
	}

	// Dumping types

	void CompilerType::DumpValue(ExecutionContext exe_ctx, Stream s,
	lldb::Format format, const DataExtractor &data,
	lldb::offset_t data_byte_offset,
	size_t data_byte_size, uint32_t bitfield_bit_size,
	uint32_t bitfield_bit_offset, bool show_types,
	bool show_summary, bool verbose, uint32_t depth) {
	if (!IsValid())
	return;
	m_type_system->DumpValue(m_type, exe_ctx, s, format, data, data_byte_offset,
	data_byte_size, bitfield_bit_size,
	bitfield_bit_offset, show_types, show_summary,
	verbose, depth);
	}

	bool CompilerType::DumpTypeValue(Stream *s, lldb::Format format,
	const DataExtractor &data,
	lldb::offset_t byte_offset, size_t byte_size,
	uint32_t bitfield_bit_size,
	uint32_t bitfield_bit_offset,
	ExecutionContextScope *exe_scope) {
	if (!IsValid())
	return false;
	return m_type_system->DumpTypeValue(m_type, s, format, data, byte_offset,
	byte_size, bitfield_bit_size,
	bitfield_bit_offset, exe_scope);
	}

	void CompilerType::DumpSummary(ExecutionContext exe_ctx, Stream s,
	const DataExtractor &data,
	lldb::offset_t data_byte_offset,
	size_t data_byte_size) {
	if (IsValid())
	m_type_system->DumpSummary(m_type, exe_ctx, s, data, data_byte_offset,
	data_byte_size);
	}

	void CompilerType::DumpTypeDescription(lldb::DescriptionLevel level) const {
	if (IsValid())
	m_type_system->DumpTypeDescription(m_type, level);
	}

	void CompilerType::DumpTypeDescription(Stream *s,
	lldb::DescriptionLevel level) const {
	if (IsValid()) {
	m_type_system->DumpTypeDescription(m_type, s, level);
	}
	}

	#ifndef NDEBUG
	LLVM_DUMP_METHOD void CompilerType::dump() const {
	if (IsValid())
	m_type_system->dump(m_type);
	else
	llvm::errs() << "<invalid>\n";
	}
	#endif

	bool CompilerType::GetValueAsScalar(const lldb_private::DataExtractor &data,
	lldb::offset_t data_byte_offset,
	size_t data_byte_size, Scalar &value,
	ExecutionContextScope *exe_scope) const {
	if (!IsValid())
	return false;

	if (IsAggregateType()) {
	return false; // Aggregate types don't have scalar values
	} else {
	uint64_t count = 0;
	lldb::Encoding encoding = GetEncoding(count);

	if (encoding == lldb::eEncodingInvalid \|\| count != 1)
	return false;

	llvm::Optional<uint64_t> byte_size = GetByteSize(exe_scope);
	if (!byte_size)
	return false;
	lldb::offset_t offset = data_byte_offset;
	switch (encoding) {
	case lldb::eEncodingInvalid:
	break;
	case lldb::eEncodingVector:
	break;
	case lldb::eEncodingUint:
	if (*byte_size <= sizeof(unsigned long long)) {
	uint64_t uval64 = data.GetMaxU64(&offset, *byte_size);
	if (*byte_size <= sizeof(unsigned int)) {
	value = (unsigned int)uval64;
	return true;
	} else if (*byte_size <= sizeof(unsigned long)) {
	value = (unsigned long)uval64;
	return true;
	} else if (*byte_size <= sizeof(unsigned long long)) {
	value = (unsigned long long)uval64;
	return true;
	} else
	value.Clear();
	}
	break;

	case lldb::eEncodingSint:
	if (*byte_size <= sizeof(long long)) {
	int64_t sval64 = data.GetMaxS64(&offset, *byte_size);
	if (*byte_size <= sizeof(int)) {
	value = (int)sval64;
	return true;
	} else if (*byte_size <= sizeof(long)) {
	value = (long)sval64;
	return true;
	} else if (*byte_size <= sizeof(long long)) {
	value = (long long)sval64;
	return true;
	} else
	value.Clear();
	}
	break;

	case lldb::eEncodingIEEE754:
	if (*byte_size <= sizeof(long double)) {
	uint32_t u32;
	uint64_t u64;
	if (*byte_size == sizeof(float)) {
	if (sizeof(float) == sizeof(uint32_t)) {
	u32 = data.GetU32(&offset);
	value = ((float )&u32);
	return true;
	} else if (sizeof(float) == sizeof(uint64_t)) {
	u64 = data.GetU64(&offset);
	value = ((float )&u64);
	return true;
	}
	} else if (*byte_size == sizeof(double)) {
	if (sizeof(double) == sizeof(uint32_t)) {
	u32 = data.GetU32(&offset);
	value = ((double )&u32);
	return true;
	} else if (sizeof(double) == sizeof(uint64_t)) {
	u64 = data.GetU64(&offset);
	value = ((double )&u64);
	return true;
	}
	} else if (*byte_size == sizeof(long double)) {
	if (sizeof(long double) == sizeof(uint32_t)) {
	u32 = data.GetU32(&offset);
	value = ((long double )&u32);
	return true;
	} else if (sizeof(long double) == sizeof(uint64_t)) {
	u64 = data.GetU64(&offset);
	value = ((long double )&u64);
	return true;
	}
	}
	}
	break;
	}
	}
	return false;
	}

	#ifndef NDEBUG
	bool CompilerType::Verify() const {
	return !IsValid() \|\| m_type_system->Verify(m_type);
	}
	#endif

	bool lldb_private::operator==(const lldb_private::CompilerType &lhs,
	const lldb_private::CompilerType &rhs) {
	return lhs.GetTypeSystem() == rhs.GetTypeSystem() &&
	lhs.GetOpaqueQualType() == rhs.GetOpaqueQualType();
	}

	bool lldb_private::operator!=(const lldb_private::CompilerType &lhs,
	const lldb_private::CompilerType &rhs) {
	return !(lhs == rhs);
	}
	diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
	index 3092dc0bf0a4..412373533aab 100644
	--- a/lldb/source/Symbol/TypeSystem.cpp
	+++ b/lldb/source/Symbol/TypeSystem.cpp
	@@ -1,300 +1,302 @@
	//===-- TypeSystem.cpp ----------------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "lldb/Symbol/TypeSystem.h"
	#include "lldb/Core/PluginManager.h"
	#include "lldb/Expression/UtilityFunction.h"
	#include "lldb/Symbol/CompilerType.h"
	#include "lldb/Target/Language.h"

	#include <set>

	using namespace lldb_private;
	using namespace lldb;

	/// A 64-bit SmallBitVector is only small up to 64-7 bits, and the
	/// setBitsInMask interface wants to write full bytes.
	static const size_t g_num_small_bitvector_bits = 64 - 8;
	static_assert(eNumLanguageTypes < g_num_small_bitvector_bits,
	"Languages bit vector is no longer small on 64 bit systems");
	LanguageSet::LanguageSet() : bitvector(eNumLanguageTypes, false) {}

	llvm::Optional<LanguageType> LanguageSet::GetSingularLanguage() {
	if (bitvector.count() == 1)
	return (LanguageType)bitvector.find_first();
	return {};
	}

	void LanguageSet::Insert(LanguageType language) { bitvector.set(language); }
	size_t LanguageSet::Size() const { return bitvector.count(); }
	bool LanguageSet::Empty() const { return bitvector.none(); }
	bool LanguageSet::operator[](unsigned i) const { return bitvector[i]; }

	TypeSystem::~TypeSystem() = default;

	static lldb::TypeSystemSP CreateInstanceHelper(lldb::LanguageType language,
	Module module, Target target) {
	uint32_t i = 0;
	TypeSystemCreateInstance create_callback;
	while ((create_callback = PluginManager::GetTypeSystemCreateCallbackAtIndex(
	i++)) != nullptr) {
	lldb::TypeSystemSP type_system_sp =
	create_callback(language, module, target);
	if (type_system_sp)
	return type_system_sp;
	}

	return lldb::TypeSystemSP();
	}

	lldb::TypeSystemSP TypeSystem::CreateInstance(lldb::LanguageType language,
	Module *module) {
	return CreateInstanceHelper(language, module, nullptr);
	}

	lldb::TypeSystemSP TypeSystem::CreateInstance(lldb::LanguageType language,
	Target *target) {
	return CreateInstanceHelper(language, nullptr, target);
	}

	#ifndef NDEBUG
	bool TypeSystem::Verify(lldb::opaque_compiler_type_t type) { return true; }
	#endif

	bool TypeSystem::IsAnonymousType(lldb::opaque_compiler_type_t type) {
	return false;
	}

	CompilerType TypeSystem::GetArrayType(lldb::opaque_compiler_type_t type,
	uint64_t size) {
	return CompilerType();
	}

	CompilerType
	TypeSystem::GetLValueReferenceType(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType
	TypeSystem::GetRValueReferenceType(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType TypeSystem::GetAtomicType(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType TypeSystem::AddConstModifier(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType
	TypeSystem::AddVolatileModifier(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType
	TypeSystem::AddRestrictModifier(lldb::opaque_compiler_type_t type) {
	return CompilerType();
	}

	CompilerType TypeSystem::CreateTypedef(lldb::opaque_compiler_type_t type,
	const char *name,
	const CompilerDeclContext &decl_ctx,
	uint32_t opaque_payload) {
	return CompilerType();
	}

	CompilerType TypeSystem::GetBuiltinTypeByName(ConstString name) {
	return CompilerType();
	}

	CompilerType TypeSystem::GetTypeForFormatters(void *type) {
	return CompilerType(this, type);
	}

	-size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) {
	+size_t TypeSystem::GetNumTemplateArguments(lldb::opaque_compiler_type_t type,
	+ bool expand_pack) {
	return 0;
	}

	TemplateArgumentKind
	-TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx) {
	+TypeSystem::GetTemplateArgumentKind(opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack) {
	return eTemplateArgumentKindNull;
	}

	CompilerType TypeSystem::GetTypeTemplateArgument(opaque_compiler_type_t type,
	- size_t idx) {
	+ size_t idx, bool expand_pack) {
	return CompilerType();
	}

	llvm::Optional<CompilerType::IntegralTemplateArgument>
	-TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type,
	- size_t idx) {
	+TypeSystem::GetIntegralTemplateArgument(opaque_compiler_type_t type, size_t idx,
	+ bool expand_pack) {
	return llvm::None;
	}

	LazyBool TypeSystem::ShouldPrintAsOneLiner(void type, ValueObject valobj) {
	return eLazyBoolCalculate;
	}

	bool TypeSystem::IsMeaninglessWithoutDynamicResolution(void *type) {
	return false;
	}

	ConstString TypeSystem::DeclGetMangledName(void *opaque_decl) {
	return ConstString();
	}

	CompilerDeclContext TypeSystem::DeclGetDeclContext(void *opaque_decl) {
	return CompilerDeclContext();
	}

	CompilerType TypeSystem::DeclGetFunctionReturnType(void *opaque_decl) {
	return CompilerType();
	}

	size_t TypeSystem::DeclGetFunctionNumArguments(void *opaque_decl) { return 0; }

	CompilerType TypeSystem::DeclGetFunctionArgumentType(void *opaque_decl,
	size_t arg_idx) {
	return CompilerType();
	}

	std::vector<CompilerDecl>
	TypeSystem::DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name,
	bool ignore_imported_decls) {
	return std::vector<CompilerDecl>();
	}

	std::unique_ptr<UtilityFunction>
	TypeSystem::CreateUtilityFunction(std::string text, std::string name) {
	return {};
	}

	#pragma mark TypeSystemMap

	TypeSystemMap::TypeSystemMap() : m_mutex(), m_map() {}

	TypeSystemMap::~TypeSystemMap() = default;

	void TypeSystemMap::Clear() {
	collection map;
	{
	std::lock_guard<std::mutex> guard(m_mutex);
	map = m_map;
	m_clear_in_progress = true;
	}
	std::set<TypeSystem *> visited;
	for (auto pair : map) {
	TypeSystem *type_system = pair.second.get();
	if (type_system && !visited.count(type_system)) {
	visited.insert(type_system);
	type_system->Finalize();
	}
	}
	map.clear();
	{
	std::lock_guard<std::mutex> guard(m_mutex);
	m_map.clear();
	m_clear_in_progress = false;
	}
	}

	void TypeSystemMap::ForEach(std::function<bool(TypeSystem *)> const &callback) {
	std::lock_guard<std::mutex> guard(m_mutex);
	// Use a std::set so we only call the callback once for each unique
	// TypeSystem instance
	std::set<TypeSystem *> visited;
	for (auto pair : m_map) {
	TypeSystem *type_system = pair.second.get();
	if (type_system && !visited.count(type_system)) {
	visited.insert(type_system);
	if (!callback(type_system))
	break;
	}
	}
	}

	llvm::Expected<TypeSystem &> TypeSystemMap::GetTypeSystemForLanguage(
	lldb::LanguageType language,
	llvm::Optional<CreateCallback> create_callback) {
	std::lock_guard<std::mutex> guard(m_mutex);
	if (m_clear_in_progress)
	return llvm::make_error<llvm::StringError>(
	"Unable to get TypeSystem because TypeSystemMap is being cleared",
	llvm::inconvertibleErrorCode());

	collection::iterator pos = m_map.find(language);
	if (pos != m_map.end()) {
	auto *type_system = pos->second.get();
	if (type_system)
	return *type_system;
	return llvm::make_error<llvm::StringError>(
	"TypeSystem for language " +
	llvm::StringRef(Language::GetNameForLanguageType(language)) +
	" doesn't exist",
	llvm::inconvertibleErrorCode());
	}

	for (const auto &pair : m_map) {
	if (pair.second && pair.second->SupportsLanguage(language)) {
	// Add a new mapping for "language" to point to an already existing
	// TypeSystem that supports this language
	m_map[language] = pair.second;
	if (pair.second.get())
	return *pair.second.get();
	return llvm::make_error<llvm::StringError>(
	"TypeSystem for language " +
	llvm::StringRef(Language::GetNameForLanguageType(language)) +
	" doesn't exist",
	llvm::inconvertibleErrorCode());
	}
	}

	if (!create_callback)
	return llvm::make_error<llvm::StringError>(
	"Unable to find type system for language " +
	llvm::StringRef(Language::GetNameForLanguageType(language)),
	llvm::inconvertibleErrorCode());

	// Cache even if we get a shared pointer that contains a null type system
	// back
	TypeSystemSP type_system_sp = (*create_callback)();
	m_map[language] = type_system_sp;
	if (type_system_sp.get())
	return *type_system_sp.get();
	return llvm::make_error<llvm::StringError>(
	"TypeSystem for language " +
	llvm::StringRef(Language::GetNameForLanguageType(language)) +
	" doesn't exist",
	llvm::inconvertibleErrorCode());
	}

	llvm::Expected<TypeSystem &>
	TypeSystemMap::GetTypeSystemForLanguage(lldb::LanguageType language,
	Module *module, bool can_create) {
	if (can_create) {
	return GetTypeSystemForLanguage(
	language, llvm::Optional<CreateCallback>([language, module]() {
	return TypeSystem::CreateInstance(language, module);
	}));
	}
	return GetTypeSystemForLanguage(language);
	}

	llvm::Expected<TypeSystem &>
	TypeSystemMap::GetTypeSystemForLanguage(lldb::LanguageType language,
	Target *target, bool can_create) {
	if (can_create) {
	return GetTypeSystemForLanguage(
	language, llvm::Optional<CreateCallback>([language, target]() {
	return TypeSystem::CreateInstance(language, target);
	}));
	}
	return GetTypeSystemForLanguage(language);
	}
	diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
	index c4795a80ead2..bc20f33f174c 100644
	--- a/llvm/lib/Analysis/VectorUtils.cpp
	+++ b/llvm/lib/Analysis/VectorUtils.cpp
	@@ -1,1554 +1,1559 @@
	//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines vectorizer utilities.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/ADT/EquivalenceClasses.h"
	#include "llvm/Analysis/DemandedBits.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/Analysis/LoopIterator.h"
	#include "llvm/Analysis/ScalarEvolution.h"
	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Value.h"
	#include "llvm/Support/CommandLine.h"

	#define DEBUG_TYPE "vectorutils"

	using namespace llvm;
	using namespace llvm::PatternMatch;

	/// Maximum factor for an interleaved memory access.
	static cl::opt<unsigned> MaxInterleaveGroupFactor(
	"max-interleave-group-factor", cl::Hidden,
	cl::desc("Maximum factor for an interleaved access group (default = 8)"),
	cl::init(8));

	/// Return true if all of the intrinsic's arguments and return type are scalars
	/// for the scalar form of the intrinsic, and vectors for the vector form of the
	/// intrinsic (except operands that are marked as always being scalar by
	/// isVectorIntrinsicWithScalarOpAtArg).
	bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
	switch (ID) {
	case Intrinsic::abs: // Begin integer bit-manipulation.
	case Intrinsic::bswap:
	case Intrinsic::bitreverse:
	case Intrinsic::ctpop:
	case Intrinsic::ctlz:
	case Intrinsic::cttz:
	case Intrinsic::fshl:
	case Intrinsic::fshr:
	case Intrinsic::smax:
	case Intrinsic::smin:
	case Intrinsic::umax:
	case Intrinsic::umin:
	case Intrinsic::sadd_sat:
	case Intrinsic::ssub_sat:
	case Intrinsic::uadd_sat:
	case Intrinsic::usub_sat:
	case Intrinsic::smul_fix:
	case Intrinsic::smul_fix_sat:
	case Intrinsic::umul_fix:
	case Intrinsic::umul_fix_sat:
	case Intrinsic::sqrt: // Begin floating-point.
	case Intrinsic::sin:
	case Intrinsic::cos:
	case Intrinsic::exp:
	case Intrinsic::exp2:
	case Intrinsic::log:
	case Intrinsic::log10:
	case Intrinsic::log2:
	case Intrinsic::fabs:
	case Intrinsic::minnum:
	case Intrinsic::maxnum:
	case Intrinsic::minimum:
	case Intrinsic::maximum:
	case Intrinsic::copysign:
	case Intrinsic::floor:
	case Intrinsic::ceil:
	case Intrinsic::trunc:
	case Intrinsic::rint:
	case Intrinsic::nearbyint:
	case Intrinsic::round:
	case Intrinsic::roundeven:
	case Intrinsic::pow:
	case Intrinsic::fma:
	case Intrinsic::fmuladd:
	case Intrinsic::powi:
	case Intrinsic::canonicalize:
	case Intrinsic::fptosi_sat:
	case Intrinsic::fptoui_sat:
	return true;
	default:
	return false;
	}
	}

	/// Identifies if the vector form of the intrinsic has a scalar operand.
	bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
	unsigned ScalarOpdIdx) {
	switch (ID) {
	case Intrinsic::abs:
	case Intrinsic::ctlz:
	case Intrinsic::cttz:
	case Intrinsic::powi:
	return (ScalarOpdIdx == 1);
	case Intrinsic::smul_fix:
	case Intrinsic::smul_fix_sat:
	case Intrinsic::umul_fix:
	case Intrinsic::umul_fix_sat:
	return (ScalarOpdIdx == 2);
	default:
	return false;
	}
	}

	bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
	unsigned OpdIdx) {
	switch (ID) {
	case Intrinsic::fptosi_sat:
	case Intrinsic::fptoui_sat:
	return OpdIdx == 0;
	case Intrinsic::powi:
	return OpdIdx == 1;
	default:
	return false;
	}
	}

	/// Returns intrinsic ID for call.
	/// For the input call instruction it finds mapping intrinsic and returns
	/// its ID, in case it does not found it return not_intrinsic.
	Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
	const TargetLibraryInfo *TLI) {
	Intrinsic::ID ID = getIntrinsicForCallSite(*CI, TLI);
	if (ID == Intrinsic::not_intrinsic)
	return Intrinsic::not_intrinsic;

	if (isTriviallyVectorizable(ID) \|\| ID == Intrinsic::lifetime_start \|\|
	ID == Intrinsic::lifetime_end \|\| ID == Intrinsic::assume \|\|
	ID == Intrinsic::experimental_noalias_scope_decl \|\|
	ID == Intrinsic::sideeffect \|\| ID == Intrinsic::pseudoprobe)
	return ID;
	return Intrinsic::not_intrinsic;
	}

	/// Find the operand of the GEP that should be checked for consecutive
	/// stores. This ignores trailing indices that have no effect on the final
	/// pointer.
	unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
	const DataLayout &DL = Gep->getModule()->getDataLayout();
	unsigned LastOperand = Gep->getNumOperands() - 1;
	TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());

	// Walk backwards and try to peel off zeros.
	while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
	// Find the type we're currently indexing into.
	gep_type_iterator GEPTI = gep_type_begin(Gep);
	std::advance(GEPTI, LastOperand - 2);

	// If it's a type with the same allocation size as the result of the GEP we
	// can peel off the zero index.
	if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
	break;
	--LastOperand;
	}

	return LastOperand;
	}

	/// If the argument is a GEP, then returns the operand identified by
	/// getGEPInductionOperand. However, if there is some other non-loop-invariant
	/// operand, it returns that instead.
	Value llvm::stripGetElementPtr(Value Ptr, ScalarEvolution SE, Loop Lp) {
	GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
	if (!GEP)
	return Ptr;

	unsigned InductionOperand = getGEPInductionOperand(GEP);

	// Check that all of the gep indices are uniform except for our induction
	// operand.
	for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
	if (i != InductionOperand &&
	!SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
	return Ptr;
	return GEP->getOperand(InductionOperand);
	}

	/// If a value has only one user that is a CastInst, return it.
	Value llvm::getUniqueCastUse(Value Ptr, Loop Lp, Type Ty) {
	Value *UniqueCast = nullptr;
	for (User *U : Ptr->users()) {
	CastInst *CI = dyn_cast<CastInst>(U);
	if (CI && CI->getType() == Ty) {
	if (!UniqueCast)
	UniqueCast = CI;
	else
	return nullptr;
	}
	}
	return UniqueCast;
	}

	/// Get the stride of a pointer access in a loop. Looks for symbolic
	/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
	Value llvm::getStrideFromPointer(Value Ptr, ScalarEvolution SE, Loop Lp) {
	auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
	if (!PtrTy \|\| PtrTy->isAggregateType())
	return nullptr;

	// Try to remove a gep instruction to make the pointer (actually index at this
	// point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
	// pointer, otherwise, we are analyzing the index.
	Value *OrigPtr = Ptr;

	// The size of the pointer access.
	int64_t PtrAccessSize = 1;

	Ptr = stripGetElementPtr(Ptr, SE, Lp);
	const SCEV *V = SE->getSCEV(Ptr);

	if (Ptr != OrigPtr)
	// Strip off casts.
	while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
	V = C->getOperand();

	const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
	if (!S)
	return nullptr;

	V = S->getStepRecurrence(*SE);
	if (!V)
	return nullptr;

	// Strip off the size of access multiplication if we are still analyzing the
	// pointer.
	if (OrigPtr == Ptr) {
	if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
	if (M->getOperand(0)->getSCEVType() != scConstant)
	return nullptr;

	const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();

	// Huge step value - give up.
	if (APStepVal.getBitWidth() > 64)
	return nullptr;

	int64_t StepVal = APStepVal.getSExtValue();
	if (PtrAccessSize != StepVal)
	return nullptr;
	V = M->getOperand(1);
	}
	}

	// Strip off casts.
	Type *StripedOffRecurrenceCast = nullptr;
	if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
	StripedOffRecurrenceCast = C->getType();
	V = C->getOperand();
	}

	// Look for the loop invariant symbolic value.
	const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
	if (!U)
	return nullptr;

	Value *Stride = U->getValue();
	if (!Lp->isLoopInvariant(Stride))
	return nullptr;

	// If we have stripped off the recurrence cast we have to make sure that we
	// return the value that is used in this loop so that we can replace it later.
	if (StripedOffRecurrenceCast)
	Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);

	return Stride;
	}

	/// Given a vector and an element number, see if the scalar value is
	/// already around as a register, for example if it were inserted then extracted
	/// from the vector.
	Value llvm::findScalarElement(Value V, unsigned EltNo) {
	assert(V->getType()->isVectorTy() && "Not looking at a vector?");
	VectorType *VTy = cast<VectorType>(V->getType());
	// For fixed-length vector, return undef for out of range access.
	if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
	unsigned Width = FVTy->getNumElements();
	if (EltNo >= Width)
	return UndefValue::get(FVTy->getElementType());
	}

	if (Constant *C = dyn_cast<Constant>(V))
	return C->getAggregateElement(EltNo);

	if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
	// If this is an insert to a variable element, we don't know what it is.
	if (!isa<ConstantInt>(III->getOperand(2)))
	return nullptr;
	unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();

	// If this is an insert to the element we are looking for, return the
	// inserted value.
	if (EltNo == IIElt)
	return III->getOperand(1);

	// Guard against infinite loop on malformed, unreachable IR.
	if (III == III->getOperand(0))
	return nullptr;

	// Otherwise, the insertelement doesn't modify the value, recurse on its
	// vector input.
	return findScalarElement(III->getOperand(0), EltNo);
	}

	ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
	// Restrict the following transformation to fixed-length vector.
	if (SVI && isa<FixedVectorType>(SVI->getType())) {
	unsigned LHSWidth =
	cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
	int InEl = SVI->getMaskValue(EltNo);
	if (InEl < 0)
	return UndefValue::get(VTy->getElementType());
	if (InEl < (int)LHSWidth)
	return findScalarElement(SVI->getOperand(0), InEl);
	return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
	}

	// Extract a value from a vector add operation with a constant zero.
	// TODO: Use getBinOpIdentity() to generalize this.
	Value Val; Constant C;
	if (match(V, m_Add(m_Value(Val), m_Constant(C))))
	if (Constant *Elt = C->getAggregateElement(EltNo))
	if (Elt->isNullValue())
	return findScalarElement(Val, EltNo);

	// If the vector is a splat then we can trivially find the scalar element.
	if (isa<ScalableVectorType>(VTy))
	if (Value *Splat = getSplatValue(V))
	if (EltNo < VTy->getElementCount().getKnownMinValue())
	return Splat;

	// Otherwise, we don't know.
	return nullptr;
	}

	int llvm::getSplatIndex(ArrayRef<int> Mask) {
	int SplatIndex = -1;
	for (int M : Mask) {
	// Ignore invalid (undefined) mask elements.
	if (M < 0)
	continue;

	// There can be only 1 non-negative mask element value if this is a splat.
	if (SplatIndex != -1 && SplatIndex != M)
	return -1;

	// Initialize the splat index to the 1st non-negative mask element.
	SplatIndex = M;
	}
	assert((SplatIndex == -1 \|\| SplatIndex >= 0) && "Negative index?");
	return SplatIndex;
	}

	/// Get splat value if the input is a splat vector or return nullptr.
	/// This function is not fully general. It checks only 2 cases:
	/// the input value is (1) a splat constant vector or (2) a sequence
	/// of instructions that broadcasts a scalar at element 0.
	Value llvm::getSplatValue(const Value V) {
	if (isa<VectorType>(V->getType()))
	if (auto *C = dyn_cast<Constant>(V))
	return C->getSplatValue();

	// shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
	Value *Splat;
	if (match(V,
	m_Shuffle(m_InsertElt(m_Value(), m_Value(Splat), m_ZeroInt()),
	m_Value(), m_ZeroMask())))
	return Splat;

	return nullptr;
	}

	bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
	assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");

	if (isa<VectorType>(V->getType())) {
	if (isa<UndefValue>(V))
	return true;
	// FIXME: We can allow undefs, but if Index was specified, we may want to
	// check that the constant is defined at that index.
	if (auto *C = dyn_cast<Constant>(V))
	return C->getSplatValue() != nullptr;
	}

	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
	// FIXME: We can safely allow undefs here. If Index was specified, we will
	// check that the mask elt is defined at the required index.
	if (!is_splat(Shuf->getShuffleMask()))
	return false;

	// Match any index.
	if (Index == -1)
	return true;

	// Match a specific element. The mask should be defined at and match the
	// specified index.
	return Shuf->getMaskValue(Index) == Index;
	}

	// The remaining tests are all recursive, so bail out if we hit the limit.
	if (Depth++ == MaxAnalysisRecursionDepth)
	return false;

	// If both operands of a binop are splats, the result is a splat.
	Value X, Y, *Z;
	if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
	return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);

	// If all operands of a select are splats, the result is a splat.
	if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
	return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
	isSplatValue(Z, Index, Depth);

	// TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).

	return false;
	}

	void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,
	SmallVectorImpl<int> &ScaledMask) {
	assert(Scale > 0 && "Unexpected scaling factor");

	// Fast-path: if no scaling, then it is just a copy.
	if (Scale == 1) {
	ScaledMask.assign(Mask.begin(), Mask.end());
	return;
	}

	ScaledMask.clear();
	for (int MaskElt : Mask) {
	if (MaskElt >= 0) {
	assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
	"Overflowed 32-bits");
	}
	for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
	ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
	}
	}

	bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,
	SmallVectorImpl<int> &ScaledMask) {
	assert(Scale > 0 && "Unexpected scaling factor");

	// Fast-path: if no scaling, then it is just a copy.
	if (Scale == 1) {
	ScaledMask.assign(Mask.begin(), Mask.end());
	return true;
	}

	// We must map the original elements down evenly to a type with less elements.
	int NumElts = Mask.size();
	if (NumElts % Scale != 0)
	return false;

	ScaledMask.clear();
	ScaledMask.reserve(NumElts / Scale);

	// Step through the input mask by splitting into Scale-sized slices.
	do {
	ArrayRef<int> MaskSlice = Mask.take_front(Scale);
	assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");

	// The first element of the slice determines how we evaluate this slice.
	int SliceFront = MaskSlice.front();
	if (SliceFront < 0) {
	// Negative values (undef or other "sentinel" values) must be equal across
	// the entire slice.
	if (!is_splat(MaskSlice))
	return false;
	ScaledMask.push_back(SliceFront);
	} else {
	// A positive mask element must be cleanly divisible.
	if (SliceFront % Scale != 0)
	return false;
	// Elements of the slice must be consecutive.
	for (int i = 1; i < Scale; ++i)
	if (MaskSlice[i] != SliceFront + i)
	return false;
	ScaledMask.push_back(SliceFront / Scale);
	}
	Mask = Mask.drop_front(Scale);
	} while (!Mask.empty());

	assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");

	// All elements of the original mask can be scaled down to map to the elements
	// of a mask with wider elements.
	return true;
	}

	void llvm::processShuffleMasks(
	ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
	unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
	function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
	function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
	SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
	// Try to perform better estimation of the permutation.
	// 1. Split the source/destination vectors into real registers.
	// 2. Do the mask analysis to identify which real registers are
	// permuted.
	int Sz = Mask.size();
	unsigned SzDest = Sz / NumOfDestRegs;
	unsigned SzSrc = Sz / NumOfSrcRegs;
	for (unsigned I = 0; I < NumOfDestRegs; ++I) {
	auto &RegMasks = Res[I];
	RegMasks.assign(NumOfSrcRegs, {});
	// Check that the values in dest registers are in the one src
	// register.
	for (unsigned K = 0; K < SzDest; ++K) {
	int Idx = I * SzDest + K;
	if (Idx == Sz)
	break;
	if (Mask[Idx] >= Sz \|\| Mask[Idx] == UndefMaskElem)
	continue;
	int SrcRegIdx = Mask[Idx] / SzSrc;
	// Add a cost of PermuteTwoSrc for each new source register permute,
	// if we have more than one source registers.
	if (RegMasks[SrcRegIdx].empty())
	RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
	RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
	}
	}
	// Process split mask.
	for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
	auto &Dest = Res[I];
	int NumSrcRegs =
	count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
	switch (NumSrcRegs) {
	case 0:
	// No input vectors were used!
	NoInputAction();
	break;
	case 1: {
	// Find the only mask with at least single undef mask elem.
	auto *It =
	find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
	unsigned SrcReg = std::distance(Dest.begin(), It);
	SingleInputAction(*It, SrcReg, I);
	break;
	}
	default: {
	// The first mask is a permutation of a single register. Since we have >2
	// input registers to shuffle, we merge the masks for 2 first registers
	// and generate a shuffle of 2 registers rather than the reordering of the
	// first register and then shuffle with the second register. Next,
	// generate the shuffles of the resulting register + the remaining
	// registers from the list.
	auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
	ArrayRef<int> SecondMask) {
	for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
	if (SecondMask[Idx] != UndefMaskElem) {
	assert(FirstMask[Idx] == UndefMaskElem &&
	"Expected undefined mask element.");
	FirstMask[Idx] = SecondMask[Idx] + VF;
	}
	}
	};
	auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
	for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
	if (Mask[Idx] != UndefMaskElem)
	Mask[Idx] = Idx;
	}
	};
	int SecondIdx;
	do {
	int FirstIdx = -1;
	SecondIdx = -1;
	MutableArrayRef<int> FirstMask, SecondMask;
	for (unsigned I = 0; I < NumOfDestRegs; ++I) {
	SmallVectorImpl<int> &RegMask = Dest[I];
	if (RegMask.empty())
	continue;

	if (FirstIdx == SecondIdx) {
	FirstIdx = I;
	FirstMask = RegMask;
	continue;
	}
	SecondIdx = I;
	SecondMask = RegMask;
	CombineMasks(FirstMask, SecondMask);
	ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
	NormalizeMask(FirstMask);
	RegMask.clear();
	SecondMask = FirstMask;
	SecondIdx = FirstIdx;
	}
	if (FirstIdx != SecondIdx && SecondIdx >= 0) {
	CombineMasks(SecondMask, FirstMask);
	ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
	Dest[FirstIdx].clear();
	NormalizeMask(SecondMask);
	}
	} while (SecondIdx >= 0);
	break;
	}
	}
	}
	}

	MapVector<Instruction *, uint64_t>
	llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
	const TargetTransformInfo *TTI) {

	// DemandedBits will give us every value's live-out bits. But we want
	// to ensure no extra casts would need to be inserted, so every DAG
	// of connected values must have the same minimum bitwidth.
	EquivalenceClasses<Value *> ECs;
	SmallVector<Value *, 16> Worklist;
	SmallPtrSet<Value *, 4> Roots;
	SmallPtrSet<Value *, 16> Visited;
	DenseMap<Value *, uint64_t> DBits;
	SmallPtrSet<Instruction *, 4> InstructionSet;
	MapVector<Instruction *, uint64_t> MinBWs;

	// Determine the roots. We work bottom-up, from truncs or icmps.
	bool SeenExtFromIllegalType = false;
	for (auto *BB : Blocks)
	for (auto &I : *BB) {
	InstructionSet.insert(&I);

	if (TTI && (isa<ZExtInst>(&I) \|\| isa<SExtInst>(&I)) &&
	!TTI->isTypeLegal(I.getOperand(0)->getType()))
	SeenExtFromIllegalType = true;

	// Only deal with non-vector integers up to 64-bits wide.
	if ((isa<TruncInst>(&I) \|\| isa<ICmpInst>(&I)) &&
	!I.getType()->isVectorTy() &&
	I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
	// Don't make work for ourselves. If we know the loaded type is legal,
	// don't add it to the worklist.
	if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
	continue;

	Worklist.push_back(&I);
	Roots.insert(&I);
	}
	}
	// Early exit.
	if (Worklist.empty() \|\| (TTI && !SeenExtFromIllegalType))
	return MinBWs;

	// Now proceed breadth-first, unioning values together.
	while (!Worklist.empty()) {
	Value *Val = Worklist.pop_back_val();
	Value *Leader = ECs.getOrInsertLeaderValue(Val);

	if (!Visited.insert(Val).second)
	continue;

	// Non-instructions terminate a chain successfully.
	if (!isa<Instruction>(Val))
	continue;
	Instruction *I = cast<Instruction>(Val);

	// If we encounter a type that is larger than 64 bits, we can't represent
	// it so bail out.
	if (DB.getDemandedBits(I).getBitWidth() > 64)
	return MapVector<Instruction *, uint64_t>();

	uint64_t V = DB.getDemandedBits(I).getZExtValue();
	DBits[Leader] \|= V;
	DBits[I] = V;

	// Casts, loads and instructions outside of our range terminate a chain
	// successfully.
	if (isa<SExtInst>(I) \|\| isa<ZExtInst>(I) \|\| isa<LoadInst>(I) \|\|
	!InstructionSet.count(I))
	continue;

	// Unsafe casts terminate a chain unsuccessfully. We can't do anything
	// useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
	// transform anything that relies on them.
	if (isa<BitCastInst>(I) \|\| isa<PtrToIntInst>(I) \|\| isa<IntToPtrInst>(I) \|\|
	!I->getType()->isIntegerTy()) {
	DBits[Leader] \|= ~0ULL;
	continue;
	}

	// We don't modify the types of PHIs. Reductions will already have been
	// truncated if possible, and inductions' sizes will have been chosen by
	// indvars.
	if (isa<PHINode>(I))
	continue;

	if (DBits[Leader] == ~0ULL)
	// All bits demanded, no point continuing.
	continue;

	for (Value *O : cast<User>(I)->operands()) {
	ECs.unionSets(Leader, O);
	Worklist.push_back(O);
	}
	}

	// Now we've discovered all values, walk them to see if there are
	// any users we didn't see. If there are, we can't optimize that
	// chain.
	for (auto &I : DBits)
	for (auto *U : I.first->users())
	if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
	DBits[ECs.getOrInsertLeaderValue(I.first)] \|= ~0ULL;

	for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
	uint64_t LeaderDemandedBits = 0;
	for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
	LeaderDemandedBits \|= DBits[M];

	uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
	llvm::countLeadingZeros(LeaderDemandedBits);
	// Round up to a power of 2
	if (!isPowerOf2_64((uint64_t)MinBW))
	MinBW = NextPowerOf2(MinBW);

	// We don't modify the types of PHIs. Reductions will already have been
	// truncated if possible, and inductions' sizes will have been chosen by
	// indvars.
	// If we are required to shrink a PHI, abandon this entire equivalence class.
	bool Abort = false;
	for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
	if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
	Abort = true;
	break;
	}
	if (Abort)
	continue;

	for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
	if (!isa<Instruction>(M))
	continue;
	Type *Ty = M->getType();
	if (Roots.count(M))
	Ty = cast<Instruction>(M)->getOperand(0)->getType();
	if (MinBW < Ty->getScalarSizeInBits())
	MinBWs[cast<Instruction>(M)] = MinBW;
	}
	}

	return MinBWs;
	}

	/// Add all access groups in @p AccGroups to @p List.
	template <typename ListT>
	static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
	// Interpret an access group as a list containing itself.
	if (AccGroups->getNumOperands() == 0) {
	assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
	List.insert(AccGroups);
	return;
	}

	for (const auto &AccGroupListOp : AccGroups->operands()) {
	auto *Item = cast<MDNode>(AccGroupListOp.get());
	assert(isValidAsAccessGroup(Item) && "List item must be an access group");
	List.insert(Item);
	}
	}

	MDNode llvm::uniteAccessGroups(MDNode AccGroups1, MDNode *AccGroups2) {
	if (!AccGroups1)
	return AccGroups2;
	if (!AccGroups2)
	return AccGroups1;
	if (AccGroups1 == AccGroups2)
	return AccGroups1;

	SmallSetVector<Metadata *, 4> Union;
	addToAccessGroupList(Union, AccGroups1);
	addToAccessGroupList(Union, AccGroups2);

	if (Union.size() == 0)
	return nullptr;
	if (Union.size() == 1)
	return cast<MDNode>(Union.front());

	LLVMContext &Ctx = AccGroups1->getContext();
	return MDNode::get(Ctx, Union.getArrayRef());
	}

	MDNode llvm::intersectAccessGroups(const Instruction Inst1,
	const Instruction *Inst2) {
	bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
	bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();

	if (!MayAccessMem1 && !MayAccessMem2)
	return nullptr;
	if (!MayAccessMem1)
	return Inst2->getMetadata(LLVMContext::MD_access_group);
	if (!MayAccessMem2)
	return Inst1->getMetadata(LLVMContext::MD_access_group);

	MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
	MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
	if (!MD1 \|\| !MD2)
	return nullptr;
	if (MD1 == MD2)
	return MD1;

	// Use set for scalable 'contains' check.
	SmallPtrSet<Metadata *, 4> AccGroupSet2;
	addToAccessGroupList(AccGroupSet2, MD2);

	SmallVector<Metadata *, 4> Intersection;
	if (MD1->getNumOperands() == 0) {
	assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
	if (AccGroupSet2.count(MD1))
	Intersection.push_back(MD1);
	} else {
	for (const MDOperand &Node : MD1->operands()) {
	auto *Item = cast<MDNode>(Node.get());
	assert(isValidAsAccessGroup(Item) && "List item must be an access group");
	if (AccGroupSet2.count(Item))
	Intersection.push_back(Item);
	}
	}

	if (Intersection.size() == 0)
	return nullptr;
	if (Intersection.size() == 1)
	return cast<MDNode>(Intersection.front());

	LLVMContext &Ctx = Inst1->getContext();
	return MDNode::get(Ctx, Intersection);
	}

	/// \returns \p I after propagating metadata from \p VL.
	Instruction llvm::propagateMetadata(Instruction Inst, ArrayRef<Value *> VL) {
	if (VL.empty())
	return Inst;
	Instruction *I0 = cast<Instruction>(VL[0]);
	SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
	I0->getAllMetadataOtherThanDebugLoc(Metadata);

	for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
	LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
	LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
	LLVMContext::MD_access_group}) {
	MDNode *MD = I0->getMetadata(Kind);

	for (int J = 1, E = VL.size(); MD && J != E; ++J) {
	const Instruction *IJ = cast<Instruction>(VL[J]);
	MDNode *IMD = IJ->getMetadata(Kind);
	switch (Kind) {
	case LLVMContext::MD_tbaa:
	MD = MDNode::getMostGenericTBAA(MD, IMD);
	break;
	case LLVMContext::MD_alias_scope:
	MD = MDNode::getMostGenericAliasScope(MD, IMD);
	break;
	case LLVMContext::MD_fpmath:
	MD = MDNode::getMostGenericFPMath(MD, IMD);
	break;
	case LLVMContext::MD_noalias:
	case LLVMContext::MD_nontemporal:
	case LLVMContext::MD_invariant_load:
	MD = MDNode::intersect(MD, IMD);
	break;
	case LLVMContext::MD_access_group:
	MD = intersectAccessGroups(Inst, IJ);
	break;
	default:
	llvm_unreachable("unhandled metadata");
	}
	}

	Inst->setMetadata(Kind, MD);
	}

	return Inst;
	}

	Constant *
	llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
	const InterleaveGroup<Instruction> &Group) {
	// All 1's means mask is not needed.
	if (Group.getNumMembers() == Group.getFactor())
	return nullptr;

	// TODO: support reversed access.
	assert(!Group.isReverse() && "Reversed group not supported.");

	SmallVector<Constant *, 16> Mask;
	for (unsigned i = 0; i < VF; i++)
	for (unsigned j = 0; j < Group.getFactor(); ++j) {
	unsigned HasMember = Group.getMember(j) ? 1 : 0;
	Mask.push_back(Builder.getInt1(HasMember));
	}

	return ConstantVector::get(Mask);
	}

	llvm::SmallVector<int, 16>
	llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
	SmallVector<int, 16> MaskVec;
	for (unsigned i = 0; i < VF; i++)
	for (unsigned j = 0; j < ReplicationFactor; j++)
	MaskVec.push_back(i);

	return MaskVec;
	}

	llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,
	unsigned NumVecs) {
	SmallVector<int, 16> Mask;
	for (unsigned i = 0; i < VF; i++)
	for (unsigned j = 0; j < NumVecs; j++)
	Mask.push_back(j * VF + i);

	return Mask;
	}

	llvm::SmallVector<int, 16>
	llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
	SmallVector<int, 16> Mask;
	for (unsigned i = 0; i < VF; i++)
	Mask.push_back(Start + i * Stride);

	return Mask;
	}

	llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
	unsigned NumInts,
	unsigned NumUndefs) {
	SmallVector<int, 16> Mask;
	for (unsigned i = 0; i < NumInts; i++)
	Mask.push_back(Start + i);

	for (unsigned i = 0; i < NumUndefs; i++)
	Mask.push_back(-1);

	return Mask;
	}

	llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,
	unsigned NumElts) {
	// Avoid casts in the loop and make sure we have a reasonable number.
	int NumEltsSigned = NumElts;
	assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");

	// If the mask chooses an element from operand 1, reduce it to choose from the
	// corresponding element of operand 0. Undef mask elements are unchanged.
	SmallVector<int, 16> UnaryMask;
	for (int MaskElt : Mask) {
	assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
	int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
	UnaryMask.push_back(UnaryElt);
	}
	return UnaryMask;
	}

	/// A helper function for concatenating vectors. This function concatenates two
	/// vectors having the same element type. If the second vector has fewer
	/// elements than the first, it is padded with undefs.
	static Value concatenateTwoVectors(IRBuilderBase &Builder, Value V1,
	Value *V2) {
	VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
	VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
	assert(VecTy1 && VecTy2 &&
	VecTy1->getScalarType() == VecTy2->getScalarType() &&
	"Expect two vectors with the same element type");

	unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
	unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
	assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");

	if (NumElts1 > NumElts2) {
	// Extend with UNDEFs.
	V2 = Builder.CreateShuffleVector(
	V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
	}

	return Builder.CreateShuffleVector(
	V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
	}

	Value *llvm::concatenateVectors(IRBuilderBase &Builder,
	ArrayRef<Value *> Vecs) {
	unsigned NumVecs = Vecs.size();
	assert(NumVecs > 1 && "Should be at least two vectors");

	SmallVector<Value *, 8> ResList;
	ResList.append(Vecs.begin(), Vecs.end());
	do {
	SmallVector<Value *, 8> TmpList;
	for (unsigned i = 0; i < NumVecs - 1; i += 2) {
	Value V0 = ResList[i], V1 = ResList[i + 1];
	assert((V0->getType() == V1->getType() \|\| i == NumVecs - 2) &&
	"Only the last vector may have a different type");

	TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
	}

	// Push the last vector if the total number of vectors is odd.
	if (NumVecs % 2 != 0)
	TmpList.push_back(ResList[NumVecs - 1]);

	ResList = TmpList;
	NumVecs = ResList.size();
	} while (NumVecs > 1);

	return ResList[0];
	}

	bool llvm::maskIsAllZeroOrUndef(Value *Mask) {
	assert(isa<VectorType>(Mask->getType()) &&
	isa<IntegerType>(Mask->getType()->getScalarType()) &&
	cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
	1 &&
	"Mask must be a vector of i1");

	auto *ConstMask = dyn_cast<Constant>(Mask);
	if (!ConstMask)
	return false;
	if (ConstMask->isNullValue() \|\| isa<UndefValue>(ConstMask))
	return true;
	if (isa<ScalableVectorType>(ConstMask->getType()))
	return false;
	for (unsigned
	I = 0,
	E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
	I != E; ++I) {
	if (auto *MaskElt = ConstMask->getAggregateElement(I))
	if (MaskElt->isNullValue() \|\| isa<UndefValue>(MaskElt))
	continue;
	return false;
	}
	return true;
	}

	bool llvm::maskIsAllOneOrUndef(Value *Mask) {
	assert(isa<VectorType>(Mask->getType()) &&
	isa<IntegerType>(Mask->getType()->getScalarType()) &&
	cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
	1 &&
	"Mask must be a vector of i1");

	auto *ConstMask = dyn_cast<Constant>(Mask);
	if (!ConstMask)
	return false;
	if (ConstMask->isAllOnesValue() \|\| isa<UndefValue>(ConstMask))
	return true;
	if (isa<ScalableVectorType>(ConstMask->getType()))
	return false;
	for (unsigned
	I = 0,
	E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
	I != E; ++I) {
	if (auto *MaskElt = ConstMask->getAggregateElement(I))
	if (MaskElt->isAllOnesValue() \|\| isa<UndefValue>(MaskElt))
	continue;
	return false;
	}
	return true;
	}

	/// TODO: This is a lot like known bits, but for
	/// vectors. Is there something we can common this with?
	APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
	assert(isa<FixedVectorType>(Mask->getType()) &&
	isa<IntegerType>(Mask->getType()->getScalarType()) &&
	cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
	1 &&
	"Mask must be a fixed width vector of i1");

	const unsigned VWidth =
	cast<FixedVectorType>(Mask->getType())->getNumElements();
	APInt DemandedElts = APInt::getAllOnes(VWidth);
	if (auto *CV = dyn_cast<ConstantVector>(Mask))
	for (unsigned i = 0; i < VWidth; i++)
	if (CV->getAggregateElement(i)->isNullValue())
	DemandedElts.clearBit(i);
	return DemandedElts;
	}

	bool InterleavedAccessInfo::isStrided(int Stride) {
	unsigned Factor = std::abs(Stride);
	return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
	}

	void InterleavedAccessInfo::collectConstStrideAccesses(
	MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
	const ValueToValueMap &Strides) {
	auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();

	// Since it's desired that the load/store instructions be maintained in
	// "program order" for the interleaved access analysis, we have to visit the
	// blocks in the loop in reverse postorder (i.e., in a topological order).
	// Such an ordering will ensure that any load/store that may be executed
	// before a second load/store will precede the second load/store in
	// AccessStrideInfo.
	LoopBlocksDFS DFS(TheLoop);
	DFS.perform(LI);
	for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
	for (auto &I : *BB) {
	Value *Ptr = getLoadStorePointerOperand(&I);
	if (!Ptr)
	continue;
	Type *ElementTy = getLoadStoreType(&I);

	+ // Currently, codegen doesn't support cases where the type size doesn't
	+ // match the alloc size. Skip them for now.
	+ uint64_t Size = DL.getTypeAllocSize(ElementTy);
	+ if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
	+ continue;
	+
	// We don't check wrapping here because we don't know yet if Ptr will be
	// part of a full group or a group with gaps. Checking wrapping for all
	// pointers (even those that end up in groups with no gaps) will be overly
	// conservative. For full groups, wrapping should be ok since if we would
	// wrap around the address space we would do a memory access at nullptr
	// even without the transformation. The wrapping checks are therefore
	// deferred until after we've formed the interleaved groups.
	int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
	/Assume=/true, /ShouldCheckWrap=/false);

	const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
	- uint64_t Size = DL.getTypeAllocSize(ElementTy);
	AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
	getLoadStoreAlignment(&I));
	}
	}

	// Analyze interleaved accesses and collect them into interleaved load and
	// store groups.
	//
	// When generating code for an interleaved load group, we effectively hoist all
	// loads in the group to the location of the first load in program order. When
	// generating code for an interleaved store group, we sink all stores to the
	// location of the last store. This code motion can change the order of load
	// and store instructions and may break dependences.
	//
	// The code generation strategy mentioned above ensures that we won't violate
	// any write-after-read (WAR) dependences.
	//
	// E.g., for the WAR dependence: a = A[i]; // (1)
	// A[i] = b; // (2)
	//
	// The store group of (2) is always inserted at or below (2), and the load
	// group of (1) is always inserted at or above (1). Thus, the instructions will
	// never be reordered. All other dependences are checked to ensure the
	// correctness of the instruction reordering.
	//
	// The algorithm visits all memory accesses in the loop in bottom-up program
	// order. Program order is established by traversing the blocks in the loop in
	// reverse postorder when collecting the accesses.
	//
	// We visit the memory accesses in bottom-up order because it can simplify the
	// construction of store groups in the presence of write-after-write (WAW)
	// dependences.
	//
	// E.g., for the WAW dependence: A[i] = a; // (1)
	// A[i] = b; // (2)
	// A[i + 1] = c; // (3)
	//
	// We will first create a store group with (3) and (2). (1) can't be added to
	// this group because it and (2) are dependent. However, (1) can be grouped
	// with other accesses that may precede it in program order. Note that a
	// bottom-up order does not imply that WAW dependences should not be checked.
	void InterleavedAccessInfo::analyzeInterleaving(
	bool EnablePredicatedInterleavedMemAccesses) {
	LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
	const ValueToValueMap &Strides = LAI->getSymbolicStrides();

	// Holds all accesses with a constant stride.
	MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
	collectConstStrideAccesses(AccessStrideInfo, Strides);

	if (AccessStrideInfo.empty())
	return;

	// Collect the dependences in the loop.
	collectDependences();

	// Holds all interleaved store groups temporarily.
	SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
	// Holds all interleaved load groups temporarily.
	SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;

	// Search in bottom-up program order for pairs of accesses (A and B) that can
	// form interleaved load or store groups. In the algorithm below, access A
	// precedes access B in program order. We initialize a group for B in the
	// outer loop of the algorithm, and then in the inner loop, we attempt to
	// insert each A into B's group if:
	//
	// 1. A and B have the same stride,
	// 2. A and B have the same memory object size, and
	// 3. A belongs in B's group according to its distance from B.
	//
	// Special care is taken to ensure group formation will not break any
	// dependences.
	for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
	BI != E; ++BI) {
	Instruction *B = BI->first;
	StrideDescriptor DesB = BI->second;

	// Initialize a group for B if it has an allowable stride. Even if we don't
	// create a group for B, we continue with the bottom-up algorithm to ensure
	// we don't break any of B's dependences.
	InterleaveGroup<Instruction> *Group = nullptr;
	if (isStrided(DesB.Stride) &&
	(!isPredicated(B->getParent()) \|\| EnablePredicatedInterleavedMemAccesses)) {
	Group = getInterleaveGroup(B);
	if (!Group) {
	LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
	<< '\n');
	Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
	}
	if (B->mayWriteToMemory())
	StoreGroups.insert(Group);
	else
	LoadGroups.insert(Group);
	}

	for (auto AI = std::next(BI); AI != E; ++AI) {
	Instruction *A = AI->first;
	StrideDescriptor DesA = AI->second;

	// Our code motion strategy implies that we can't have dependences
	// between accesses in an interleaved group and other accesses located
	// between the first and last member of the group. Note that this also
	// means that a group can't have more than one member at a given offset.
	// The accesses in a group can have dependences with other accesses, but
	// we must ensure we don't extend the boundaries of the group such that
	// we encompass those dependent accesses.
	//
	// For example, assume we have the sequence of accesses shown below in a
	// stride-2 loop:
	//
	// (1, 2) is a group \| A[i] = a; // (1)
	// \| A[i-1] = b; // (2) \|
	// A[i-3] = c; // (3)
	// A[i] = d; // (4) \| (2, 4) is not a group
	//
	// Because accesses (2) and (3) are dependent, we can group (2) with (1)
	// but not with (4). If we did, the dependent access (3) would be within
	// the boundaries of the (2, 4) group.
	if (!canReorderMemAccessesForInterleavedGroups(&AI, &BI)) {
	// If a dependence exists and A is already in a group, we know that A
	// must be a store since A precedes B and WAR dependences are allowed.
	// Thus, A would be sunk below B. We release A's group to prevent this
	// illegal code motion. A will then be free to form another group with
	// instructions that precede it.
	if (isInterleaved(A)) {
	InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);

	LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
	"dependence between " << A << " and "<< B << '\n');

	StoreGroups.remove(StoreGroup);
	releaseGroup(StoreGroup);
	}

	// If a dependence exists and A is not already in a group (or it was
	// and we just released it), B might be hoisted above A (if B is a
	// load) or another store might be sunk below A (if B is a store). In
	// either case, we can't add additional instructions to B's group. B
	// will only form a group with instructions that it precedes.
	break;
	}

	// At this point, we've checked for illegal code motion. If either A or B
	// isn't strided, there's nothing left to do.
	if (!isStrided(DesA.Stride) \|\| !isStrided(DesB.Stride))
	continue;

	// Ignore A if it's already in a group or isn't the same kind of memory
	// operation as B.
	// Note that mayReadFromMemory() isn't mutually exclusive to
	// mayWriteToMemory in the case of atomic loads. We shouldn't see those
	// here, canVectorizeMemory() should have returned false - except for the
	// case we asked for optimization remarks.
	if (isInterleaved(A) \|\|
	(A->mayReadFromMemory() != B->mayReadFromMemory()) \|\|
	(A->mayWriteToMemory() != B->mayWriteToMemory()))
	continue;

	// Check rules 1 and 2. Ignore A if its stride or size is different from
	// that of B.
	if (DesA.Stride != DesB.Stride \|\| DesA.Size != DesB.Size)
	continue;

	// Ignore A if the memory object of A and B don't belong to the same
	// address space
	if (getLoadStoreAddressSpace(A) != getLoadStoreAddressSpace(B))
	continue;

	// Calculate the distance from A to B.
	const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
	PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
	if (!DistToB)
	continue;
	int64_t DistanceToB = DistToB->getAPInt().getSExtValue();

	// Check rule 3. Ignore A if its distance to B is not a multiple of the
	// size.
	if (DistanceToB % static_cast<int64_t>(DesB.Size))
	continue;

	// All members of a predicated interleave-group must have the same predicate,
	// and currently must reside in the same BB.
	BasicBlock *BlockA = A->getParent();
	BasicBlock *BlockB = B->getParent();
	if ((isPredicated(BlockA) \|\| isPredicated(BlockB)) &&
	(!EnablePredicatedInterleavedMemAccesses \|\| BlockA != BlockB))
	continue;

	// The index of A is the index of B plus A's distance to B in multiples
	// of the size.
	int IndexA =
	Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);

	// Try to insert A into B's group.
	if (Group->insertMember(A, IndexA, DesA.Alignment)) {
	LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
	<< " into the interleave group with" << *B
	<< '\n');
	InterleaveGroupMap[A] = Group;

	// Set the first load in program order as the insert position.
	if (A->mayReadFromMemory())
	Group->setInsertPos(A);
	}
	} // Iteration over A accesses.
	} // Iteration over B accesses.

	auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
	int Index,
	std::string FirstOrLast) -> bool {
	Instruction *Member = Group->getMember(Index);
	assert(Member && "Group member does not exist");
	Value *MemberPtr = getLoadStorePointerOperand(Member);
	Type *AccessTy = getLoadStoreType(Member);
	if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
	/Assume=/false, /ShouldCheckWrap=/true))
	return false;
	LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
	<< FirstOrLast
	<< " group member potentially pointer-wrapping.\n");
	releaseGroup(Group);
	return true;
	};

	// Remove interleaved groups with gaps whose memory
	// accesses may wrap around. We have to revisit the getPtrStride analysis,
	// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
	// not check wrapping (see documentation there).
	// FORNOW we use Assume=false;
	// TODO: Change to Assume=true but making sure we don't exceed the threshold
	// of runtime SCEV assumptions checks (thereby potentially failing to
	// vectorize altogether).
	// Additional optional optimizations:
	// TODO: If we are peeling the loop and we know that the first pointer doesn't
	// wrap then we can deduce that all pointers in the group don't wrap.
	// This means that we can forcefully peel the loop in order to only have to
	// check the first pointer for no-wrap. When we'll change to use Assume=true
	// we'll only need at most one runtime check per interleaved group.
	for (auto *Group : LoadGroups) {
	// Case 1: A full group. Can Skip the checks; For full groups, if the wide
	// load would wrap around the address space we would do a memory access at
	// nullptr even without the transformation.
	if (Group->getNumMembers() == Group->getFactor())
	continue;

	// Case 2: If first and last members of the group don't wrap this implies
	// that all the pointers in the group don't wrap.
	// So we check only group member 0 (which is always guaranteed to exist),
	// and group member Factor - 1; If the latter doesn't exist we rely on
	// peeling (if it is a non-reversed accsess -- see Case 3).
	if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
	continue;
	if (Group->getMember(Group->getFactor() - 1))
	InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
	std::string("last"));
	else {
	// Case 3: A non-reversed interleaved load group with gaps: We need
	// to execute at least one scalar epilogue iteration. This will ensure
	// we don't speculatively access memory out-of-bounds. We only need
	// to look for a member at index factor - 1, since every group must have
	// a member at index zero.
	if (Group->isReverse()) {
	LLVM_DEBUG(
	dbgs() << "LV: Invalidate candidate interleaved group due to "
	"a reverse access with gaps.\n");
	releaseGroup(Group);
	continue;
	}
	LLVM_DEBUG(
	dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
	RequiresScalarEpilogue = true;
	}
	}

	for (auto *Group : StoreGroups) {
	// Case 1: A full group. Can Skip the checks; For full groups, if the wide
	// store would wrap around the address space we would do a memory access at
	// nullptr even without the transformation.
	if (Group->getNumMembers() == Group->getFactor())
	continue;

	// Interleave-store-group with gaps is implemented using masked wide store.
	// Remove interleaved store groups with gaps if
	// masked-interleaved-accesses are not enabled by the target.
	if (!EnablePredicatedInterleavedMemAccesses) {
	LLVM_DEBUG(
	dbgs() << "LV: Invalidate candidate interleaved store group due "
	"to gaps.\n");
	releaseGroup(Group);
	continue;
	}

	// Case 2: If first and last members of the group don't wrap this implies
	// that all the pointers in the group don't wrap.
	// So we check only group member 0 (which is always guaranteed to exist),
	// and the last group member. Case 3 (scalar epilog) is not relevant for
	// stores with gaps, which are implemented with masked-store (rather than
	// speculative access, as in loads).
	if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
	continue;
	for (int Index = Group->getFactor() - 1; Index > 0; Index--)
	if (Group->getMember(Index)) {
	InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
	break;
	}
	}
	}

	void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
	// If no group had triggered the requirement to create an epilogue loop,
	// there is nothing to do.
	if (!requiresScalarEpilogue())
	return;

	bool ReleasedGroup = false;
	// Release groups requiring scalar epilogues. Note that this also removes them
	// from InterleaveGroups.
	for (auto *Group : make_early_inc_range(InterleaveGroups)) {
	if (!Group->requiresScalarEpilogue())
	continue;
	LLVM_DEBUG(
	dbgs()
	<< "LV: Invalidate candidate interleaved group due to gaps that "
	"require a scalar epilogue (not allowed under optsize) and cannot "
	"be masked (not enabled). \n");
	releaseGroup(Group);
	ReleasedGroup = true;
	}
	assert(ReleasedGroup && "At least one group must be invalidated, as a "
	"scalar epilogue was required");
	(void)ReleasedGroup;
	RequiresScalarEpilogue = false;
	}

	template <typename InstT>
	void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
	llvm_unreachable("addMetadata can only be used for Instruction");
	}

	namespace llvm {
	template <>
	void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
	SmallVector<Value *, 4> VL;
	std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
	[](std::pair<int, Instruction *> p) { return p.second; });
	propagateMetadata(NewInst, VL);
	}
	}

	std::string VFABI::mangleTLIVectorName(StringRef VectorName,
	StringRef ScalarName, unsigned numArgs,
	ElementCount VF) {
	SmallString<256> Buffer;
	llvm::raw_svector_ostream Out(Buffer);
	Out << "_ZGV" << VFABI::_LLVM_ << "N";
	if (VF.isScalable())
	Out << 'x';
	else
	Out << VF.getFixedValue();
	for (unsigned I = 0; I < numArgs; ++I)
	Out << "v";
	Out << "_" << ScalarName << "(" << VectorName << ")";
	return std::string(Out.str());
	}

	void VFABI::getVectorVariantNames(
	const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
	const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
	if (S.empty())
	return;

	SmallVector<StringRef, 8> ListAttr;
	S.split(ListAttr, ",");

	for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
	#ifndef NDEBUG
	LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
	Optional<VFInfo> Info = VFABI::tryDemangleForVFABI(S, *(CI.getModule()));
	assert(Info && "Invalid name for a VFABI variant.");
	assert(CI.getModule()->getFunction(Info.value().VectorName) &&
	"Vector function is missing.");
	#endif
	VariantMappings.push_back(std::string(S));
	}
	}

	bool VFShape::hasValidParameterList() const {
	for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
	++Pos) {
	assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");

	switch (Parameters[Pos].ParamKind) {
	default: // Nothing to check.
	break;
	case VFParamKind::OMP_Linear:
	case VFParamKind::OMP_LinearRef:
	case VFParamKind::OMP_LinearVal:
	case VFParamKind::OMP_LinearUVal:
	// Compile time linear steps must be non-zero.
	if (Parameters[Pos].LinearStepOrPos == 0)
	return false;
	break;
	case VFParamKind::OMP_LinearPos:
	case VFParamKind::OMP_LinearRefPos:
	case VFParamKind::OMP_LinearValPos:
	case VFParamKind::OMP_LinearUValPos:
	// The runtime linear step must be referring to some other
	// parameters in the signature.
	if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
	return false;
	// The linear step parameter must be marked as uniform.
	if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
	VFParamKind::OMP_Uniform)
	return false;
	// The linear step parameter can't point at itself.
	if (Parameters[Pos].LinearStepOrPos == int(Pos))
	return false;
	break;
	case VFParamKind::GlobalPredicate:
	// The global predicate must be the unique. Can be placed anywhere in the
	// signature.
	for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
	if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
	return false;
	break;
	}
	}
	return true;
	}
	diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	index 35650b9bd00e..ecdaef0442da 100644
	--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
	@@ -1,11354 +1,11355 @@
	//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This implements routines for translating from LLVM IR into SelectionDAG IR.
	//
	//===----------------------------------------------------------------------===//

	#include "SelectionDAGBuilder.h"
	#include "SDNodeDbgValue.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/BitVector.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/BranchProbabilityInfo.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/EHPersonalities.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/CodeGenCommonISel.h"
	#include "llvm/CodeGen/FunctionLoweringInfo.h"
	#include "llvm/CodeGen/GCMetadata.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineInstrBundleIterator.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineModuleInfo.h"
	#include "llvm/CodeGen/MachineOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
	#include "llvm/CodeGen/StackMaps.h"
	#include "llvm/CodeGen/SwiftErrorValueTracking.h"
	#include "llvm/CodeGen/TargetFrameLowering.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/TargetOpcodes.h"
	#include "llvm/CodeGen/TargetRegisterInfo.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/CodeGen/WinEHFuncInfo.h"
	#include "llvm/IR/Argument.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CFG.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/ConstantRange.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/IntrinsicsWebAssembly.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Statepoint.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCContext.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetIntrinsicInfo.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include <cstddef>
	#include <iterator>
	#include <limits>
	#include <tuple>

	using namespace llvm;
	using namespace PatternMatch;
	using namespace SwitchCG;

	#define DEBUG_TYPE "isel"

	/// LimitFloatPrecision - Generate low-precision inline sequences for
	/// some float libcalls (6, 8 or 12 bits).
	static unsigned LimitFloatPrecision;

	static cl::opt<bool>
	InsertAssertAlign("insert-assert-align", cl::init(true),
	cl::desc("Insert the experimental `assertalign` node."),
	cl::ReallyHidden);

	static cl::opt<unsigned, true>
	LimitFPPrecision("limit-float-precision",
	cl::desc("Generate low-precision inline sequences "
	"for some float libcalls"),
	cl::location(LimitFloatPrecision), cl::Hidden,
	cl::init(0));

	static cl::opt<unsigned> SwitchPeelThreshold(
	"switch-peel-threshold", cl::Hidden, cl::init(66),
	cl::desc("Set the case probability threshold for peeling the case from a "
	"switch statement. A value greater than 100 will void this "
	"optimization"));

	// Limit the width of DAG chains. This is important in general to prevent
	// DAG-based analysis from blowing up. For example, alias analysis and
	// load clustering may not complete in reasonable time. It is difficult to
	// recognize and avoid this situation within each individual analysis, and
	// future analyses are likely to have the same behavior. Limiting DAG width is
	// the safe approach and will be especially important with global DAGs.
	//
	// MaxParallelChains default is arbitrarily high to avoid affecting
	// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
	// sequence over this should have been converted to llvm.memcpy by the
	// frontend. It is easy to induce this behavior with .ll code such as:
	// %buffer = alloca [4096 x i8]
	// %data = load [4096 x i8]* %argPtr
	// store [4096 x i8] %data, [4096 x i8]* %buffer
	static const unsigned MaxParallelChains = 64;

	static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CC);

	/// getCopyFromParts - Create a value that contains the specified legal parts
	/// combined into the value they represent. If the parts combine to a type
	/// larger than ValueVT then AssertOp can be used to specify whether the extra
	/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
	/// (ISD::AssertSext).
	static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CC = None,
	Optional<ISD::NodeType> AssertOp = None) {
	// Let the target assemble the parts if it wants to
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts,
	PartVT, ValueVT, CC))
	return Val;

	if (ValueVT.isVector())
	return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
	CC);

	assert(NumParts > 0 && "No parts to assemble!");
	SDValue Val = Parts[0];

	if (NumParts > 1) {
	// Assemble the value from multiple parts.
	if (ValueVT.isInteger()) {
	unsigned PartBits = PartVT.getSizeInBits();
	unsigned ValueBits = ValueVT.getSizeInBits();

	// Assemble the power of 2 part.
	unsigned RoundParts =
	(NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
	unsigned RoundBits = PartBits * RoundParts;
	EVT RoundVT = RoundBits == ValueBits ?
	ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
	SDValue Lo, Hi;

	EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);

	if (RoundParts > 2) {
	Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
	PartVT, HalfVT, V);
	Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
	RoundParts / 2, PartVT, HalfVT, V);
	} else {
	Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
	Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
	}

	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);

	Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);

	if (RoundParts < NumParts) {
	// Assemble the trailing non-power-of-2 part.
	unsigned OddParts = NumParts - RoundParts;
	EVT OddVT = EVT::getIntegerVT(DAG.getContext(), OddParts PartBits);
	Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
	OddVT, V, CC);

	// Combine the round and odd parts.
	Lo = Val;
	if (DAG.getDataLayout().isBigEndian())
	std::swap(Lo, Hi);
	EVT TotalVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
	Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
	DAG.getConstant(Lo.getValueSizeInBits(), DL,
	TLI.getShiftAmountTy(
	TotalVT, DAG.getDataLayout())));
	Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
	Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
	}
	} else if (PartVT.isFloatingPoint()) {
	// FP split into multiple FP parts (for ppcf128)
	assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
	"Unexpected split");
	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
	Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
	if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
	std::swap(Lo, Hi);
	Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
	} else {
	// FP split into integer parts (soft fp)
	assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
	!PartVT.isVector() && "Unexpected split");
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
	}
	}

	// There is now one part, held in Val. Correct it to match ValueVT.
	// PartEVT is the type of the register class that holds the value.
	// ValueVT is the type of the inline asm operation.
	EVT PartEVT = Val.getValueType();

	if (PartEVT == ValueVT)
	return Val;

	if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
	ValueVT.bitsLT(PartEVT)) {
	// For an FP value in an integer part, we need to truncate to the right
	// width first.
	PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
	}

	// Handle types that have the same size.
	if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	// Handle types with different sizes.
	if (PartEVT.isInteger() && ValueVT.isInteger()) {
	if (ValueVT.bitsLT(PartEVT)) {
	// For a truncate, see if we have any information to
	// indicate whether the truncated bits will always be
	// zero or sign-extension.
	if (AssertOp)
	Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
	DAG.getValueType(ValueVT));
	return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}
	return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
	}

	if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
	// FP_ROUND's are always exact here.
	if (ValueVT.bitsLT(Val.getValueType()))
	return DAG.getNode(
	ISD::FP_ROUND, DL, ValueVT, Val,
	DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));

	return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
	}

	// Handle MMX to a narrower integer type by bitcasting MMX to integer and
	// then truncating.
	if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
	ValueVT.bitsLT(PartEVT)) {
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
	return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}

	report_fatal_error("Unknown mismatch in getCopyFromParts!");
	}

	static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
	const Twine &ErrMsg) {
	const Instruction *I = dyn_cast_or_null<Instruction>(V);
	if (!V)
	return Ctx.emitError(ErrMsg);

	const char *AsmError = ", possible invalid constraint for vector type";
	if (const CallInst *CI = dyn_cast<CallInst>(I))
	if (CI->isInlineAsm())
	return Ctx.emitError(I, ErrMsg + AsmError);

	return Ctx.emitError(I, ErrMsg);
	}

	/// getCopyFromPartsVector - Create a value that contains the specified legal
	/// parts combined into the value they represent. If the parts combine to a
	/// type larger than ValueVT then AssertOp can be used to specify whether the
	/// extra bits are known to be zero (ISD::AssertZext) or sign extended from
	/// ValueVT (ISD::AssertSext).
	static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	const SDValue *Parts, unsigned NumParts,
	MVT PartVT, EVT ValueVT, const Value *V,
	Optional<CallingConv::ID> CallConv) {
	assert(ValueVT.isVector() && "Not a vector value");
	assert(NumParts > 0 && "No parts to assemble!");
	const bool IsABIRegCopy = CallConv.has_value();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Val = Parts[0];

	// Handle a multi-element vector.
	if (NumParts > 1) {
	EVT IntermediateVT;
	MVT RegisterVT;
	unsigned NumIntermediates;
	unsigned NumRegs;

	if (IsABIRegCopy) {
	NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
	DAG.getContext(), CallConv, ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	} else {
	NumRegs =
	TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	}

	assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
	NumParts = NumRegs; // Silence a compiler warning.
	assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
	assert(RegisterVT.getSizeInBits() ==
	Parts[0].getSimpleValueType().getSizeInBits() &&
	"Part type sizes don't match!");

	// Assemble the parts into intermediate operands.
	SmallVector<SDValue, 8> Ops(NumIntermediates);
	if (NumIntermediates == NumParts) {
	// If the register was not expanded, truncate or copy the value,
	// as appropriate.
	for (unsigned i = 0; i != NumParts; ++i)
	Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
	PartVT, IntermediateVT, V, CallConv);
	} else if (NumParts > 0) {
	// If the intermediate type was expanded, build the intermediate
	// operands from the parts.
	assert(NumParts % NumIntermediates == 0 &&
	"Must expand into a divisible number of parts!");
	unsigned Factor = NumParts / NumIntermediates;
	for (unsigned i = 0; i != NumIntermediates; ++i)
	Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
	PartVT, IntermediateVT, V, CallConv);
	}

	// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
	// intermediate operands.
	EVT BuiltVectorTy =
	IntermediateVT.isVector()
	? EVT::getVectorVT(
	*DAG.getContext(), IntermediateVT.getScalarType(),
	IntermediateVT.getVectorElementCount() * NumParts)
	: EVT::getVectorVT(*DAG.getContext(),
	IntermediateVT.getScalarType(),
	NumIntermediates);
	Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
	: ISD::BUILD_VECTOR,
	DL, BuiltVectorTy, Ops);
	}

	// There is now one part, held in Val. Correct it to match ValueVT.
	EVT PartEVT = Val.getValueType();

	if (PartEVT == ValueVT)
	return Val;

	if (PartEVT.isVector()) {
	// Vector/Vector bitcast.
	if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	// If the element type of the source/dest vectors are the same, but the
	// parts vector has more elements than the value vector, then we have a
	// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
	// elements we want.
	if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
	assert((PartEVT.getVectorElementCount().getKnownMinValue() >
	ValueVT.getVectorElementCount().getKnownMinValue()) &&
	(PartEVT.getVectorElementCount().isScalable() ==
	ValueVT.getVectorElementCount().isScalable()) &&
	"Cannot narrow, it would be a lossy transformation");
	PartEVT =
	EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
	ValueVT.getVectorElementCount());
	Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
	DAG.getVectorIdxConstant(0, DL));
	if (PartEVT == ValueVT)
	return Val;
	}

	// Promoted vector extract
	return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
	}

	// Trivial bitcast if the types are the same size and the destination
	// vector type is legal.
	if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
	TLI.isTypeLegal(ValueVT))
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);

	if (ValueVT.getVectorNumElements() != 1) {
	// Certain ABIs require that vectors are passed as integers. For vectors
	// are the same size, this is an obvious bitcast.
	if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
	return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
	} else if (ValueVT.bitsLT(PartEVT)) {
	const uint64_t ValueSize = ValueVT.getFixedSizeInBits();
	EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
	// Drop the extra bits.
	Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val);
	return DAG.getBitcast(ValueVT, Val);
	}

	diagnosePossiblyInvalidConstraint(
	*DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
	return DAG.getUNDEF(ValueVT);
	}

	// Handle cases such as i8 -> <1 x i1>
	EVT ValueSVT = ValueVT.getVectorElementType();
	if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) {
	if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits())
	Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val);
	else
	Val = ValueVT.isFloatingPoint()
	? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
	: DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
	}

	return DAG.getBuildVector(ValueVT, DL, Val);
	}

	static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
	SDValue Val, SDValue *Parts, unsigned NumParts,
	MVT PartVT, const Value *V,
	Optional<CallingConv::ID> CallConv);

	/// getCopyToParts - Create a series of nodes that contain the specified value
	/// split into legal parts. If the parts contain more bits than Val, then, for
	/// integers, ExtendKind can be used to specify how to generate the extra bits.
	static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
	SDValue *Parts, unsigned NumParts, MVT PartVT,
	const Value *V,
	Optional<CallingConv::ID> CallConv = None,
	ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
	// Let the target split the parts if it wants to
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT,
	CallConv))
	return;
	EVT ValueVT = Val.getValueType();

	// Handle the vector case separately.
	if (ValueVT.isVector())
	return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
	CallConv);

	unsigned PartBits = PartVT.getSizeInBits();
	unsigned OrigNumParts = NumParts;
	assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
	"Copying to an illegal type!");

	if (NumParts == 0)
	return;

	assert(!ValueVT.isVector() && "Vector case handled elsewhere");
	EVT PartEVT = PartVT;
	if (PartEVT == ValueVT) {
	assert(NumParts == 1 && "No-op copy with multiple parts!");
	Parts[0] = Val;
	return;
	}

	if (NumParts * PartBits > ValueVT.getSizeInBits()) {
	// If the parts cover more bits than the value has, promote the value.
	if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
	assert(NumParts == 1 && "Do not know what to promote to!");
	Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
	} else {
	if (ValueVT.isFloatingPoint()) {
	// FP values need to be bitcast, then extended if they are being put
	// into a larger container.
	ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
	Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
	}
	assert((PartVT.isInteger() \|\| PartVT == MVT::x86mmx) &&
	ValueVT.isInteger() &&
	"Unknown mismatch!");
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
	if (PartVT == MVT::x86mmx)
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}
	} else if (PartBits == ValueVT.getSizeInBits()) {
	// Different types of the same size.
	assert(NumParts == 1 && PartEVT != ValueVT);
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
	// If the parts cover less bits than value has, truncate the value.
	assert((PartVT.isInteger() \|\| PartVT == MVT::x86mmx) &&
	ValueVT.isInteger() &&
	"Unknown mismatch!");
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	if (PartVT == MVT::x86mmx)
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}

	// The value may have changed - recompute ValueVT.
	ValueVT = Val.getValueType();
	assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
	"Failed to tile the value with PartVT!");

	if (NumParts == 1) {
	if (PartEVT != ValueVT) {
	diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
	"scalar-to-vector conversion failed");
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	}

	Parts[0] = Val;
	return;
	}

	// Expand the value into multiple parts.
	if (NumParts & (NumParts - 1)) {
	// The number of parts is not a power of 2. Split off and copy the tail.
	assert(PartVT.isInteger() && ValueVT.isInteger() &&
	"Do not know what to expand to!");
	unsigned RoundParts = 1 << Log2_32(NumParts);
	unsigned RoundBits = RoundParts * PartBits;
	unsigned OddParts = NumParts - RoundParts;
	SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
	DAG.getShiftAmountConstant(RoundBits, ValueVT, DL));

	getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
	CallConv);

	if (DAG.getDataLayout().isBigEndian())
	// The odd parts were reversed by getCopyToParts - unreverse them.
	std::reverse(Parts + RoundParts, Parts + NumParts);

	NumParts = RoundParts;
	ValueVT = EVT::getIntegerVT(DAG.getContext(), NumParts PartBits);
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
	}

	// The number of parts is a power of 2. Repeatedly bisect the value using
	// EXTRACT_ELEMENT.
	Parts[0] = DAG.getNode(ISD::BITCAST, DL,
	EVT::getIntegerVT(*DAG.getContext(),
	ValueVT.getSizeInBits()),
	Val);

	for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
	for (unsigned i = 0; i < NumParts; i += StepSize) {
	unsigned ThisBits = StepSize * PartBits / 2;
	EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
	SDValue &Part0 = Parts[i];
	SDValue &Part1 = Parts[i+StepSize/2];

	Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
	ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
	Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
	ThisVT, Part0, DAG.getIntPtrConstant(0, DL));

	if (ThisBits == PartBits && ThisVT != PartVT) {
	Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
	Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
	}
	}
	}

	if (DAG.getDataLayout().isBigEndian())
	std::reverse(Parts, Parts + OrigNumParts);
	}

	static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
	const SDLoc &DL, EVT PartVT) {
	if (!PartVT.isVector())
	return SDValue();

	EVT ValueVT = Val.getValueType();
	ElementCount PartNumElts = PartVT.getVectorElementCount();
	ElementCount ValueNumElts = ValueVT.getVectorElementCount();

	// We only support widening vectors with equivalent element types and
	// fixed/scalable properties. If a target needs to widen a fixed-length type
	// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
	if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) \|\|
	PartNumElts.isScalable() != ValueNumElts.isScalable() \|\|
	PartVT.getVectorElementType() != ValueVT.getVectorElementType())
	return SDValue();

	// Widening a scalable vector to another scalable vector is done by inserting
	// the vector into a larger undef one.
	if (PartNumElts.isScalable())
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
	Val, DAG.getVectorIdxConstant(0, DL));

	EVT ElementVT = PartVT.getVectorElementType();
	// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
	// undef elements.
	SmallVector<SDValue, 16> Ops;
	DAG.ExtractVectorElements(Val, Ops);
	SDValue EltUndef = DAG.getUNDEF(ElementVT);
	Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);

	// FIXME: Use CONCAT for 2x -> 4x.
	return DAG.getBuildVector(PartVT, DL, Ops);
	}

	/// getCopyToPartsVector - Create a series of nodes that contain the specified
	/// value split into legal parts.
	static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
	SDValue Val, SDValue *Parts, unsigned NumParts,
	MVT PartVT, const Value *V,
	Optional<CallingConv::ID> CallConv) {
	EVT ValueVT = Val.getValueType();
	assert(ValueVT.isVector() && "Not a vector");
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const bool IsABIRegCopy = CallConv.has_value();

	if (NumParts == 1) {
	EVT PartEVT = PartVT;
	if (PartEVT == ValueVT) {
	// Nothing to do.
	} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
	// Bitconvert vector->vector case.
	Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
	} else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
	Val = Widened;
	} else if (PartVT.isVector() &&
	PartEVT.getVectorElementType().bitsGE(
	ValueVT.getVectorElementType()) &&
	PartEVT.getVectorElementCount() ==
	ValueVT.getVectorElementCount()) {

	// Promoted vector extract
	Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
	} else if (PartEVT.isVector() &&
	PartEVT.getVectorElementType() !=
	ValueVT.getVectorElementType() &&
	TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
	TargetLowering::TypeWidenVector) {
	// Combination of widening and promotion.
	EVT WidenVT =
	EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
	PartVT.getVectorElementCount());
	SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
	Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
	} else {
	if (ValueVT.getVectorElementCount().isScalar()) {
	Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
	DAG.getVectorIdxConstant(0, DL));
	} else {
	uint64_t ValueSize = ValueVT.getFixedSizeInBits();
	assert(PartVT.getFixedSizeInBits() > ValueSize &&
	"lossy conversion of vector to scalar type");
	EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize);
	Val = DAG.getBitcast(IntermediateType, Val);
	Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
	}
	}

	assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
	Parts[0] = Val;
	return;
	}

	// Handle a multi-element vector.
	EVT IntermediateVT;
	MVT RegisterVT;
	unsigned NumIntermediates;
	unsigned NumRegs;
	if (IsABIRegCopy) {
	NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
	*DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	} else {
	NumRegs =
	TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
	NumIntermediates, RegisterVT);
	}

	assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
	NumParts = NumRegs; // Silence a compiler warning.
	assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");

	assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() &&
	"Mixing scalable and fixed vectors when copying in parts");

	Optional<ElementCount> DestEltCnt;

	if (IntermediateVT.isVector())
	DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates;
	else
	DestEltCnt = ElementCount::getFixed(NumIntermediates);

	EVT BuiltVectorTy = EVT::getVectorVT(
	DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt);

	if (ValueVT == BuiltVectorTy) {
	// Nothing to do.
	} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
	// Bitconvert vector->vector case.
	Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
	} else {
	if (BuiltVectorTy.getVectorElementType().bitsGT(
	ValueVT.getVectorElementType())) {
	// Integer promotion.
	ValueVT = EVT::getVectorVT(*DAG.getContext(),
	BuiltVectorTy.getVectorElementType(),
	ValueVT.getVectorElementCount());
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
	}

	if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
	Val = Widened;
	}
	}

	assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");

	// Split the vector into intermediate operands.
	SmallVector<SDValue, 8> Ops(NumIntermediates);
	for (unsigned i = 0; i != NumIntermediates; ++i) {
	if (IntermediateVT.isVector()) {
	// This does something sensible for scalable vectors - see the
	// definition of EXTRACT_SUBVECTOR for further details.
	unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements();
	Ops[i] =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
	DAG.getVectorIdxConstant(i * IntermediateNumElts, DL));
	} else {
	Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
	DAG.getVectorIdxConstant(i, DL));
	}
	}

	// Split the intermediate operands into legal parts.
	if (NumParts == NumIntermediates) {
	// If the register was not expanded, promote or copy the value,
	// as appropriate.
	for (unsigned i = 0; i != NumParts; ++i)
	getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
	} else if (NumParts > 0) {
	// If the intermediate type was expanded, split each the value into
	// legal parts.
	assert(NumIntermediates != 0 && "division by zero");
	assert(NumParts % NumIntermediates == 0 &&
	"Must expand into a divisible number of parts!");
	unsigned Factor = NumParts / NumIntermediates;
	for (unsigned i = 0; i != NumIntermediates; ++i)
	getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
	CallConv);
	}
	}

	RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
	EVT valuevt, Optional<CallingConv::ID> CC)
	: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
	RegCount(1, regs.size()), CallConv(CC) {}

	RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
	const DataLayout &DL, unsigned Reg, Type *Ty,
	Optional<CallingConv::ID> CC) {
	ComputeValueVTs(TLI, DL, Ty, ValueVTs);

	CallConv = CC;

	for (EVT ValueVT : ValueVTs) {
	unsigned NumRegs =
	isABIMangled()
	? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT)
	: TLI.getNumRegisters(Context, ValueVT);
	MVT RegisterVT =
	isABIMangled()
	? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT)
	: TLI.getRegisterType(Context, ValueVT);
	for (unsigned i = 0; i != NumRegs; ++i)
	Regs.push_back(Reg + i);
	RegVTs.push_back(RegisterVT);
	RegCount.push_back(NumRegs);
	Reg += NumRegs;
	}
	}

	SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
	FunctionLoweringInfo &FuncInfo,
	const SDLoc &dl, SDValue &Chain,
	SDValue Flag, const Value V) const {
	// A Value with type {} or [0 x %t] needs no registers.
	if (ValueVTs.empty())
	return SDValue();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// Assemble the legal parts into the final values.
	SmallVector<SDValue, 4> Values(ValueVTs.size());
	SmallVector<SDValue, 8> Parts;
	for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
	// Copy the legal parts from the registers.
	EVT ValueVT = ValueVTs[Value];
	unsigned NumRegs = RegCount[Value];
	MVT RegisterVT =
	isABIMangled() ? TLI.getRegisterTypeForCallingConv(
	*DAG.getContext(), CallConv.value(), RegVTs[Value])
	: RegVTs[Value];

	Parts.resize(NumRegs);
	for (unsigned i = 0; i != NumRegs; ++i) {
	SDValue P;
	if (!Flag) {
	P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
	} else {
	P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
	*Flag = P.getValue(2);
	}

	Chain = P.getValue(1);
	Parts[i] = P;

	// If the source register was virtual and if we know something about it,
	// add an assert node.
	if (!Register::isVirtualRegister(Regs[Part + i]) \|\|
	!RegisterVT.isInteger())
	continue;

	const FunctionLoweringInfo::LiveOutInfo *LOI =
	FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
	if (!LOI)
	continue;

	unsigned RegSize = RegisterVT.getScalarSizeInBits();
	unsigned NumSignBits = LOI->NumSignBits;
	unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();

	if (NumZeroBits == RegSize) {
	// The current value is a zero.
	// Explicitly express that as it would be easier for
	// optimizations to kick in.
	Parts[i] = DAG.getConstant(0, dl, RegisterVT);
	continue;
	}

	// FIXME: We capture more information than the dag can represent. For
	// now, just use the tightest assertzext/assertsext possible.
	bool isSExt;
	EVT FromVT(MVT::Other);
	if (NumZeroBits) {
	FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
	isSExt = false;
	} else if (NumSignBits > 1) {
	FromVT =
	EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
	isSExt = true;
	} else {
	continue;
	}
	// Add an assertion node.
	assert(FromVT != MVT::Other);
	Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
	RegisterVT, P, DAG.getValueType(FromVT));
	}

	Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
	RegisterVT, ValueVT, V, CallConv);
	Part += NumRegs;
	Parts.clear();
	}

	return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
	}

	void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
	const SDLoc &dl, SDValue &Chain, SDValue *Flag,
	const Value *V,
	ISD::NodeType PreferredExtendType) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	ISD::NodeType ExtendKind = PreferredExtendType;

	// Get the list of the values's legal parts.
	unsigned NumRegs = Regs.size();
	SmallVector<SDValue, 8> Parts(NumRegs);
	for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
	unsigned NumParts = RegCount[Value];

	MVT RegisterVT =
	isABIMangled() ? TLI.getRegisterTypeForCallingConv(
	*DAG.getContext(), CallConv.value(), RegVTs[Value])
	: RegVTs[Value];

	if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
	ExtendKind = ISD::ZERO_EXTEND;

	getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
	NumParts, RegisterVT, V, CallConv, ExtendKind);
	Part += NumParts;
	}

	// Copy the parts into the registers.
	SmallVector<SDValue, 8> Chains(NumRegs);
	for (unsigned i = 0; i != NumRegs; ++i) {
	SDValue Part;
	if (!Flag) {
	Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
	} else {
	Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
	*Flag = Part.getValue(1);
	}

	Chains[i] = Part.getValue(0);
	}

	if (NumRegs == 1 \|\| Flag)
	// If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
	// flagged to it. That is the CopyToReg nodes and the user are considered
	// a single scheduling unit. If we create a TokenFactor and return it as
	// chain, then the TokenFactor is both a predecessor (operand) of the
	// user as well as a successor (the TF operands are flagged to the user).
	// c1, f1 = CopyToReg
	// c2, f2 = CopyToReg
	// c3 = TokenFactor c1, c2
	// ...
	// = op c3, ..., f2
	Chain = Chains[NumRegs-1];
	else
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	}

	void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
	unsigned MatchingIdx, const SDLoc &dl,
	SelectionDAG &DAG,
	std::vector<SDValue> &Ops) const {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
	if (HasMatching)
	Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
	else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
	// Put the register class of the virtual registers in the flag word. That
	// way, later passes can recompute register class constraints for inline
	// assembly as well as normal instructions.
	// Don't do this for tied operands that can use the regclass information
	// from the def.
	const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
	const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
	Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
	}

	SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
	Ops.push_back(Res);

	if (Code == InlineAsm::Kind_Clobber) {
	// Clobbers should always have a 1:1 mapping with registers, and may
	// reference registers that have illegal (e.g. vector) types. Hence, we
	// shouldn't try to apply any sort of splitting logic to them.
	assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
	"No 1:1 mapping from clobbers to regs?");
	Register SP = TLI.getStackPointerRegisterToSaveRestore();
	(void)SP;
	for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
	Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
	assert(
	(Regs[I] != SP \|\|
	DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
	"If we clobbered the stack pointer, MFI should know about it.");
	}
	return;
	}

	for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
	MVT RegisterVT = RegVTs[Value];
	unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value],
	RegisterVT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	assert(Reg < Regs.size() && "Mismatch in # registers expected");
	unsigned TheReg = Regs[Reg++];
	Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
	}
	}
	}

	SmallVector<std::pair<unsigned, TypeSize>, 4>
	RegsForValue::getRegsAndSizes() const {
	SmallVector<std::pair<unsigned, TypeSize>, 4> OutVec;
	unsigned I = 0;
	for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
	unsigned RegCount = std::get<0>(CountAndVT);
	MVT RegisterVT = std::get<1>(CountAndVT);
	TypeSize RegisterSize = RegisterVT.getSizeInBits();
	for (unsigned E = I + RegCount; I != E; ++I)
	OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
	}
	return OutVec;
	}

	void SelectionDAGBuilder::init(GCFunctionInfo gfi, AliasAnalysis aa,
	const TargetLibraryInfo *li) {
	AA = aa;
	GFI = gfi;
	LibInfo = li;
	Context = DAG.getContext();
	LPadToCallSiteMap.clear();
	SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
	}

	void SelectionDAGBuilder::clear() {
	NodeMap.clear();
	UnusedArgNodeMap.clear();
	PendingLoads.clear();
	PendingExports.clear();
	PendingConstrainedFP.clear();
	PendingConstrainedFPStrict.clear();
	CurInst = nullptr;
	HasTailCall = false;
	SDNodeOrder = LowestSDNodeOrder;
	StatepointLowering.clear();
	}

	void SelectionDAGBuilder::clearDanglingDebugInfo() {
	DanglingDebugInfoMap.clear();
	}

	// Update DAG root to include dependencies on Pending chains.
	SDValue SelectionDAGBuilder::updateRoot(SmallVectorImpl<SDValue> &Pending) {
	SDValue Root = DAG.getRoot();

	if (Pending.empty())
	return Root;

	// Add current root to PendingChains, unless we already indirectly
	// depend on it.
	if (Root.getOpcode() != ISD::EntryToken) {
	unsigned i = 0, e = Pending.size();
	for (; i != e; ++i) {
	assert(Pending[i].getNode()->getNumOperands() > 1);
	if (Pending[i].getNode()->getOperand(0) == Root)
	break; // Don't add the root if we already indirectly depend on it.
	}

	if (i == e)
	Pending.push_back(Root);
	}

	if (Pending.size() == 1)
	Root = Pending[0];
	else
	Root = DAG.getTokenFactor(getCurSDLoc(), Pending);

	DAG.setRoot(Root);
	Pending.clear();
	return Root;
	}

	SDValue SelectionDAGBuilder::getMemoryRoot() {
	return updateRoot(PendingLoads);
	}

	SDValue SelectionDAGBuilder::getRoot() {
	// Chain up all pending constrained intrinsics together with all
	// pending loads, by simply appending them to PendingLoads and
	// then calling getMemoryRoot().
	PendingLoads.reserve(PendingLoads.size() +
	PendingConstrainedFP.size() +
	PendingConstrainedFPStrict.size());
	PendingLoads.append(PendingConstrainedFP.begin(),
	PendingConstrainedFP.end());
	PendingLoads.append(PendingConstrainedFPStrict.begin(),
	PendingConstrainedFPStrict.end());
	PendingConstrainedFP.clear();
	PendingConstrainedFPStrict.clear();
	return getMemoryRoot();
	}

	SDValue SelectionDAGBuilder::getControlRoot() {
	// We need to emit pending fpexcept.strict constrained intrinsics,
	// so append them to the PendingExports list.
	PendingExports.append(PendingConstrainedFPStrict.begin(),
	PendingConstrainedFPStrict.end());
	PendingConstrainedFPStrict.clear();
	return updateRoot(PendingExports);
	}

	void SelectionDAGBuilder::visit(const Instruction &I) {
	// Set up outgoing PHI node register values before emitting the terminator.
	if (I.isTerminator()) {
	HandlePHINodesInSuccessorBlocks(I.getParent());
	}

	// Increase the SDNodeOrder if dealing with a non-debug instruction.
	if (!isa<DbgInfoIntrinsic>(I))
	++SDNodeOrder;

	CurInst = &I;

	visit(I.getOpcode(), I);

	if (!I.isTerminator() && !HasTailCall &&
	!isa<GCStatepointInst>(I)) // statepoints handle their exports internally
	CopyToExportRegsIfNeeded(&I);

	CurInst = nullptr;
	}

	void SelectionDAGBuilder::visitPHI(const PHINode &) {
	llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
	}

	void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
	// Note: this doesn't use InstVisitor, because it has to work with
	// ConstantExpr's in addition to instructions.
	switch (Opcode) {
	default: llvm_unreachable("Unknown instruction type encountered!");
	// Build the switch statement using the Instruction.def file.
	#define HANDLE_INST(NUM, OPCODE, CLASS) \
	case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
	#include "llvm/IR/Instruction.def"
	}
	}

	void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
	DebugLoc DL, unsigned Order) {
	// We treat variadic dbg_values differently at this stage.
	if (DI->hasArgList()) {
	// For variadic dbg_values we will now insert an undef.
	// FIXME: We can potentially recover these!
	SmallVector<SDDbgOperand, 2> Locs;
	for (const Value *V : DI->getValues()) {
	auto Undef = UndefValue::get(V->getType());
	Locs.push_back(SDDbgOperand::fromConst(Undef));
	}
	SDDbgValue *SDV = DAG.getDbgValueList(
	DI->getVariable(), DI->getExpression(), Locs, {},
	/IsIndirect=/false, DL, Order, /IsVariadic=/true);
	DAG.AddDbgValue(SDV, /isParameter=/false);
	} else {
	// TODO: Dangling debug info will eventually either be resolved or produce
	// an Undef DBG_VALUE. However in the resolution case, a gap may appear
	// between the original dbg.value location and its resolved DBG_VALUE,
	// which we should ideally fill with an extra Undef DBG_VALUE.
	assert(DI->getNumVariableLocationOps() == 1 &&
	"DbgValueInst without an ArgList should have a single location "
	"operand.");
	DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order);
	}
	}

	void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
	const DIExpression *Expr) {
	auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
	const DbgValueInst *DI = DDI.getDI();
	DIVariable *DanglingVariable = DI->getVariable();
	DIExpression *DanglingExpr = DI->getExpression();
	if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
	LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
	return true;
	}
	return false;
	};

	for (auto &DDIMI : DanglingDebugInfoMap) {
	DanglingDebugInfoVector &DDIV = DDIMI.second;

	// If debug info is to be dropped, run it through final checks to see
	// whether it can be salvaged.
	for (auto &DDI : DDIV)
	if (isMatchingDbgValue(DDI))
	salvageUnresolvedDbgValue(DDI);

	erase_if(DDIV, isMatchingDbgValue);
	}
	}

	// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
	// generate the debug data structures now that we've seen its definition.
	void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
	SDValue Val) {
	auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
	if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
	return;

	DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
	for (auto &DDI : DDIV) {
	const DbgValueInst *DI = DDI.getDI();
	assert(!DI->hasArgList() && "Not implemented for variadic dbg_values");
	assert(DI && "Ill-formed DanglingDebugInfo");
	DebugLoc dl = DDI.getdl();
	unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
	unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
	DILocalVariable *Variable = DI->getVariable();
	DIExpression *Expr = DI->getExpression();
	assert(Variable->isValidLocationForIntrinsic(dl) &&
	"Expected inlined-at fields to agree");
	SDDbgValue *SDV;
	if (Val.getNode()) {
	// FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
	// FuncArgumentDbgValue (it would be hoisted to the function entry, and if
	// we couldn't resolve it directly when examining the DbgValue intrinsic
	// in the first place we should not be more successful here). Unless we
	// have some test case that prove this to be correct we should avoid
	// calling EmitFuncArgumentDbgValue here.
	if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl,
	FuncArgumentDbgValueKind::Value, Val)) {
	LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
	<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
	LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
	// Increase the SDNodeOrder for the DbgValue here to make sure it is
	// inserted after the definition of Val when emitting the instructions
	// after ISel. An alternative could be to teach
	// ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
	LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
	<< "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
	<< ValSDNodeOrder << "\n");
	SDV = getDbgValue(Val, Variable, Expr, dl,
	std::max(DbgSDNodeOrder, ValSDNodeOrder));
	DAG.AddDbgValue(SDV, false);
	} else
	LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
	<< "in EmitFuncArgumentDbgValue\n");
	} else {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
	auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
	auto SDV =
	DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	}
	}
	DDIV.clear();
	}

	void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
	// TODO: For the variadic implementation, instead of only checking the fail
	// state of `handleDebugValue`, we need know specifically which values were
	// invalid, so that we attempt to salvage only those values when processing
	// a DIArgList.
	assert(!DDI.getDI()->hasArgList() &&
	"Not implemented for variadic dbg_values");
	Value *V = DDI.getDI()->getValue(0);
	DILocalVariable *Var = DDI.getDI()->getVariable();
	DIExpression *Expr = DDI.getDI()->getExpression();
	DebugLoc DL = DDI.getdl();
	DebugLoc InstDL = DDI.getDI()->getDebugLoc();
	unsigned SDOrder = DDI.getSDNodeOrder();
	// Currently we consider only dbg.value intrinsics -- we tell the salvager
	// that DW_OP_stack_value is desired.
	assert(isa<DbgValueInst>(DDI.getDI()));
	bool StackValue = true;

	// Can this Value can be encoded without any further work?
	if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /IsVariadic=/false))
	return;

	// Attempt to salvage back through as many instructions as possible. Bail if
	// a non-instruction is seen, such as a constant expression or global
	// variable. FIXME: Further work could recover those too.
	while (isa<Instruction>(V)) {
	Instruction &VAsInst = *cast<Instruction>(V);
	// Temporary "0", awaiting real implementation.
	SmallVector<uint64_t, 16> Ops;
	SmallVector<Value *, 4> AdditionalValues;
	V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
	AdditionalValues);
	// If we cannot salvage any further, and haven't yet found a suitable debug
	// expression, bail out.
	if (!V)
	break;

	// TODO: If AdditionalValues isn't empty, then the salvage can only be
	// represented with a DBG_VALUE_LIST, so we give up. When we have support
	// here for variadic dbg_values, remove that condition.
	if (!AdditionalValues.empty())
	break;

	// New value and expr now represent this debuginfo.
	Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);

	// Some kind of simplification occurred: check whether the operand of the
	// salvaged debug expression can be encoded in this DAG.
	if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder,
	/IsVariadic=/false)) {
	LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
	<< DDI.getDI() << "\nBy stripping back to:\n " << V);
	return;
	}
	}

	// This was the final opportunity to salvage this debug information, and it
	// couldn't be done. Place an undef DBG_VALUE at this location to terminate
	// any earlier variable location.
	auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType());
	auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);

	LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI()
	<< "\n");
	LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
	<< "\n");
	}

	bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
	DILocalVariable *Var,
	DIExpression *Expr, DebugLoc dl,
	DebugLoc InstDL, unsigned Order,
	bool IsVariadic) {
	if (Values.empty())
	return true;
	SmallVector<SDDbgOperand> LocationOps;
	SmallVector<SDNode *> Dependencies;
	for (const Value *V : Values) {
	// Constant value.
	if (isa<ConstantInt>(V) \|\| isa<ConstantFP>(V) \|\| isa<UndefValue>(V) \|\|
	isa<ConstantPointerNull>(V)) {
	LocationOps.emplace_back(SDDbgOperand::fromConst(V));
	continue;
	}

	// If the Value is a frame index, we can create a FrameIndex debug value
	// without relying on the DAG at all.
	if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
	auto SI = FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end()) {
	LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(SI->second));
	continue;
	}
	}

	// Do not use getValue() in here; we don't want to generate code at
	// this point if it hasn't been done yet.
	SDValue N = NodeMap[V];
	if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
	N = UnusedArgNodeMap[V];
	if (N.getNode()) {
	// Only emit func arg dbg value for non-variadic dbg.values for now.
	if (!IsVariadic &&
	EmitFuncArgumentDbgValue(V, Var, Expr, dl,
	FuncArgumentDbgValueKind::Value, N))
	return true;
	if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
	// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can
	// describe stack slot locations.
	//
	// Consider "int x = 0; int *px = &x;". There are two kinds of
	// interesting debug values here after optimization:
	//
	// dbg.value(i32* %px, !"int *px", !DIExpression()), and
	// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
	//
	// Both describe the direct values of their associated variables.
	Dependencies.push_back(N.getNode());
	LocationOps.emplace_back(SDDbgOperand::fromFrameIdx(FISDN->getIndex()));
	continue;
	}
	LocationOps.emplace_back(
	SDDbgOperand::fromNode(N.getNode(), N.getResNo()));
	continue;
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	// Special rules apply for the first dbg.values of parameter variables in a
	// function. Identify them by the fact they reference Argument Values, that
	// they're parameters, and they are parameters of the current function. We
	// need to let them dangle until they get an SDNode.
	bool IsParamOfFunc =
	isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt();
	if (IsParamOfFunc)
	return false;

	// The value is not used in this block yet (or it would have an SDNode).
	// We still want the value to appear for the user if possible -- if it has
	// an associated VReg, we can refer to that instead.
	auto VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	unsigned Reg = VMI->second;
	// If this is a PHI node, it may be split up into several MI PHI nodes
	// (in FunctionLoweringInfo::set).
	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
	V->getType(), None);
	if (RFV.occupiesMultipleRegs()) {
	// FIXME: We could potentially support variadic dbg_values here.
	if (IsVariadic)
	return false;
	unsigned Offset = 0;
	unsigned BitsToDescribe = 0;
	if (auto VarSize = Var->getSizeInBits())
	BitsToDescribe = *VarSize;
	if (auto Fragment = Expr->getFragmentInfo())
	BitsToDescribe = Fragment->SizeInBits;
	for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
	// Bail out if all bits are described already.
	if (Offset >= BitsToDescribe)
	break;
	// TODO: handle scalable vectors.
	unsigned RegisterSize = RegAndSize.second;
	unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
	? BitsToDescribe - Offset
	: RegisterSize;
	auto FragmentExpr = DIExpression::createFragmentExpression(
	Expr, Offset, FragmentSize);
	if (!FragmentExpr)
	continue;
	SDDbgValue *SDV = DAG.getVRegDbgValue(
	Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	Offset += RegisterSize;
	}
	return true;
	}
	// We can use simple vreg locations for variadic dbg_values as well.
	LocationOps.emplace_back(SDDbgOperand::fromVReg(Reg));
	continue;
	}
	// We failed to create a SDDbgOperand for V.
	return false;
	}

	// We have created a SDDbgOperand for each Value in Values.
	// Should use Order instead of SDNodeOrder?
	assert(!LocationOps.empty());
	SDDbgValue *SDV =
	DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies,
	/IsIndirect=/false, dl, SDNodeOrder, IsVariadic);
	DAG.AddDbgValue(SDV, /isParameter=/false);
	return true;
	}

	void SelectionDAGBuilder::resolveOrClearDbgInfo() {
	// Try to fixup any remaining dangling debug info -- and drop it if we can't.
	for (auto &Pair : DanglingDebugInfoMap)
	for (auto &DDI : Pair.second)
	salvageUnresolvedDbgValue(DDI);
	clearDanglingDebugInfo();
	}

	/// getCopyFromRegs - If there was virtual register allocated for the value V
	/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
	SDValue SelectionDAGBuilder::getCopyFromRegs(const Value V, Type Ty) {
	DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V);
	SDValue Result;

	if (It != FuncInfo.ValueMap.end()) {
	Register InReg = It->second;

	RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
	DAG.getDataLayout(), InReg, Ty,
	None); // This is not an ABI copy.
	SDValue Chain = DAG.getEntryNode();
	Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
	V);
	resolveDanglingDebugInfo(V, Result);
	}

	return Result;
	}

	/// getValue - Return an SDValue for the given Value.
	SDValue SelectionDAGBuilder::getValue(const Value *V) {
	// If we already have an SDValue for this value, use it. It's important
	// to do this first, so that we don't create a CopyFromReg if we already
	// have a regular SDValue.
	SDValue &N = NodeMap[V];
	if (N.getNode()) return N;

	// If there's a virtual register allocated and initialized for this
	// value, use it.
	if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
	return copyFromReg;

	// Otherwise create a new SDValue and remember it.
	SDValue Val = getValueImpl(V);
	NodeMap[V] = Val;
	resolveDanglingDebugInfo(V, Val);
	return Val;
	}

	/// getNonRegisterValue - Return an SDValue for the given Value, but
	/// don't look in FuncInfo.ValueMap for a virtual register.
	SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
	// If we already have an SDValue for this value, use it.
	SDValue &N = NodeMap[V];
	if (N.getNode()) {
	if (isa<ConstantSDNode>(N) \|\| isa<ConstantFPSDNode>(N)) {
	// Remove the debug location from the node as the node is about to be used
	// in a location which may differ from the original debug location. This
	// is relevant to Constant and ConstantFP nodes because they can appear
	// as constant expressions inside PHI nodes.
	N->setDebugLoc(DebugLoc());
	}
	return N;
	}

	// Otherwise create a new SDValue and remember it.
	SDValue Val = getValueImpl(V);
	NodeMap[V] = Val;
	resolveDanglingDebugInfo(V, Val);
	return Val;
	}

	/// getValueImpl - Helper function for getValue and getNonRegisterValue.
	/// Create an SDValue for the given value.
	SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (const Constant *C = dyn_cast<Constant>(V)) {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);

	if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
	return DAG.getConstant(*CI, getCurSDLoc(), VT);

	if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
	return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);

	if (isa<ConstantPointerNull>(C)) {
	unsigned AS = V->getType()->getPointerAddressSpace();
	return DAG.getConstant(0, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout(), AS));
	}

	if (match(C, m_VScale(DAG.getDataLayout())))
	return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));

	if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
	return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);

	if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
	return DAG.getUNDEF(VT);

	if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
	visit(CE->getOpcode(), *CE);
	SDValue N1 = NodeMap[V];
	assert(N1.getNode() && "visit didn't populate the NodeMap!");
	return N1;
	}

	if (isa<ConstantStruct>(C) \|\| isa<ConstantArray>(C)) {
	SmallVector<SDValue, 4> Constants;
	for (const Use &U : C->operands()) {
	SDNode *Val = getValue(U).getNode();
	// If the operand is an empty aggregate, there are no values.
	if (!Val) continue;
	// Add each leaf value from the operand to the Constants list
	// to form a flattened list of all the values.
	for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
	Constants.push_back(SDValue(Val, i));
	}

	return DAG.getMergeValues(Constants, getCurSDLoc());
	}

	if (const ConstantDataSequential *CDS =
	dyn_cast<ConstantDataSequential>(C)) {
	SmallVector<SDValue, 4> Ops;
	for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
	SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
	// Add each leaf value from the operand to the Constants list
	// to form a flattened list of all the values.
	for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
	Ops.push_back(SDValue(Val, i));
	}

	if (isa<ArrayType>(CDS->getType()))
	return DAG.getMergeValues(Ops, getCurSDLoc());
	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	}

	if (C->getType()->isStructTy() \|\| C->getType()->isArrayTy()) {
	assert((isa<ConstantAggregateZero>(C) \|\| isa<UndefValue>(C)) &&
	"Unknown struct or array constant!");

	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
	unsigned NumElts = ValueVTs.size();
	if (NumElts == 0)
	return SDValue(); // empty struct
	SmallVector<SDValue, 4> Constants(NumElts);
	for (unsigned i = 0; i != NumElts; ++i) {
	EVT EltVT = ValueVTs[i];
	if (isa<UndefValue>(C))
	Constants[i] = DAG.getUNDEF(EltVT);
	else if (EltVT.isFloatingPoint())
	Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
	else
	Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
	}

	return DAG.getMergeValues(Constants, getCurSDLoc());
	}

	if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
	return DAG.getBlockAddress(BA, VT);

	if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(C))
	return getValue(Equiv->getGlobalValue());

	if (const auto *NC = dyn_cast<NoCFIValue>(C))
	return getValue(NC->getGlobalValue());

	VectorType *VecTy = cast<VectorType>(V->getType());

	// Now that we know the number and type of the elements, get that number of
	// elements into the Ops array based on what kind of constant it is.
	if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
	SmallVector<SDValue, 16> Ops;
	unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
	for (unsigned i = 0; i != NumElements; ++i)
	Ops.push_back(getValue(CV->getOperand(i)));

	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	}

	if (isa<ConstantAggregateZero>(C)) {
	EVT EltVT =
	TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());

	SDValue Op;
	if (EltVT.isFloatingPoint())
	Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
	else
	Op = DAG.getConstant(0, getCurSDLoc(), EltVT);

	if (isa<ScalableVectorType>(VecTy))
	return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op);

	SmallVector<SDValue, 16> Ops;
	Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op);
	return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
	}

	llvm_unreachable("Unknown vector constant");
	}

	// If this is a static alloca, generate it as the frameindex instead of
	// computation.
	if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
	DenseMap<const AllocaInst*, int>::iterator SI =
	FuncInfo.StaticAllocaMap.find(AI);
	if (SI != FuncInfo.StaticAllocaMap.end())
	return DAG.getFrameIndex(SI->second,
	TLI.getFrameIndexTy(DAG.getDataLayout()));
	}

	// If this is an instruction which fast-isel has deferred, select it now.
	if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
	unsigned InReg = FuncInfo.InitializeRegForValue(Inst);

	RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
	Inst->getType(), None);
	SDValue Chain = DAG.getEntryNode();
	return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
	}

	if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V))
	return DAG.getMDNode(cast<MDNode>(MD->getMetadata()));

	if (const auto *BB = dyn_cast<BasicBlock>(V))
	return DAG.getBasicBlock(FuncInfo.MBBMap[BB]);

	llvm_unreachable("Can't get register for value!");
	}

	void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
	bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
	bool IsSEH = isAsynchronousEHPersonality(Pers);
	MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
	if (!IsSEH)
	CatchPadMBB->setIsEHScopeEntry();
	// In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
	if (IsMSVCCXX \|\| IsCoreCLR)
	CatchPadMBB->setIsEHFuncletEntry();
	}

	void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
	// Update machine-CFG edge.
	MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
	FuncInfo.MBB->addSuccessor(TargetMBB);
	TargetMBB->setIsEHCatchretTarget(true);
	DAG.getMachineFunction().setHasEHCatchret(true);

	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsSEH = isAsynchronousEHPersonality(Pers);
	if (IsSEH) {
	// If this is not a fall-through branch or optimizations are switched off,
	// emit the branch.
	if (TargetMBB != NextBlock(FuncInfo.MBB) \|\|
	TM.getOptLevel() == CodeGenOpt::None)
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(TargetMBB)));
	return;
	}

	// Figure out the funclet membership for the catchret's successor.
	// This will be used by the FuncletLayout pass to determine how to order the
	// BB's.
	// A 'catchret' returns to the outer scope's color.
	Value *ParentPad = I.getCatchSwitchParentPad();
	const BasicBlock *SuccessorColor;
	if (isa<ConstantTokenNone>(ParentPad))
	SuccessorColor = &FuncInfo.Fn->getEntryBlock();
	else
	SuccessorColor = cast<Instruction>(ParentPad)->getParent();
	assert(SuccessorColor && "No parent funclet for catchret!");
	MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
	assert(SuccessorColorMBB && "No MBB for SuccessorColor!");

	// Create the terminator node.
	SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(TargetMBB),
	DAG.getBasicBlock(SuccessorColorMBB));
	DAG.setRoot(Ret);
	}

	void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
	// Don't emit any special code for the cleanuppad instruction. It just marks
	// the start of an EH scope/funclet.
	FuncInfo.MBB->setIsEHScopeEntry();
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	if (Pers != EHPersonality::Wasm_CXX) {
	FuncInfo.MBB->setIsEHFuncletEntry();
	FuncInfo.MBB->setIsCleanupFuncletEntry();
	}
	}

	// In wasm EH, even though a catchpad may not catch an exception if a tag does
	// not match, it is OK to add only the first unwind destination catchpad to the
	// successors, because there will be at least one invoke instruction within the
	// catch scope that points to the next unwind destination, if one exists, so
	// CFGSort cannot mess up with BB sorting order.
	// (All catchpads with 'catch (type)' clauses have a 'llvm.rethrow' intrinsic
	// call within them, and catchpads only consisting of 'catch (...)' have a
	// '__cxa_end_catch' call within them, both of which generate invokes in case
	// the next unwind destination exists, i.e., the next unwind destination is not
	// the caller.)
	//
	// Having at most one EH pad successor is also simpler and helps later
	// transformations.
	//
	// For example,
	// current:
	// invoke void @foo to ... unwind label %catch.dispatch
	// catch.dispatch:
	// %0 = catchswitch within ... [label %catch.start] unwind label %next
	// catch.start:
	// ...
	// ... in this BB or some other child BB dominated by this BB there will be an
	// invoke that points to 'next' BB as an unwind destination
	//
	// next: ; We don't need to add this to 'current' BB's successor
	// ...
	static void findWasmUnwindDestinations(
	FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
	BranchProbability Prob,
	SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
	&UnwindDests) {
	while (EHPadBB) {
	const Instruction *Pad = EHPadBB->getFirstNonPHI();
	if (isa<CleanupPadInst>(Pad)) {
	// Stop on cleanup pads.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	break;
	} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
	// Add the catchpad handlers to the possible destinations. We don't
	// continue to the unwind destination of the catchswitch for wasm.
	for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
	UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	}
	break;
	} else {
	continue;
	}
	}
	}

	/// When an invoke or a cleanupret unwinds to the next EH pad, there are
	/// many places it could ultimately go. In the IR, we have a single unwind
	/// destination, but in the machine CFG, we enumerate all the possible blocks.
	/// This function skips over imaginary basic blocks that hold catchswitch
	/// instructions, and finds all the "real" machine
	/// basic block destinations. As those destinations may not be successors of
	/// EHPadBB, here we also calculate the edge probability to those destinations.
	/// The passed-in Prob is the edge probability to EHPadBB.
	static void findUnwindDestinations(
	FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
	BranchProbability Prob,
	SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
	&UnwindDests) {
	EHPersonality Personality =
	classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
	bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
	bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
	bool IsSEH = isAsynchronousEHPersonality(Personality);

	if (IsWasmCXX) {
	findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
	assert(UnwindDests.size() <= 1 &&
	"There should be at most one unwind destination for wasm");
	return;
	}

	while (EHPadBB) {
	const Instruction *Pad = EHPadBB->getFirstNonPHI();
	BasicBlock *NewEHPadBB = nullptr;
	if (isa<LandingPadInst>(Pad)) {
	// Stop on landingpads. They are not funclets.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	break;
	} else if (isa<CleanupPadInst>(Pad)) {
	// Stop on cleanup pads. Cleanups are always funclet entries for all known
	// personalities.
	UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
	UnwindDests.back().first->setIsEHScopeEntry();
	UnwindDests.back().first->setIsEHFuncletEntry();
	break;
	} else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
	// Add the catchpad handlers to the possible destinations.
	for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
	UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
	// For MSVC++ and the CLR, catchblocks are funclets and need prologues.
	if (IsMSVCCXX \|\| IsCoreCLR)
	UnwindDests.back().first->setIsEHFuncletEntry();
	if (!IsSEH)
	UnwindDests.back().first->setIsEHScopeEntry();
	}
	NewEHPadBB = CatchSwitch->getUnwindDest();
	} else {
	continue;
	}

	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	if (BPI && NewEHPadBB)
	Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
	EHPadBB = NewEHPadBB;
	}
	}

	void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
	// Update successor info.
	SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
	auto UnwindDest = I.getUnwindDest();
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	BranchProbability UnwindDestProb =
	(BPI && UnwindDest)
	? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
	: BranchProbability::getZero();
	findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
	for (auto &UnwindDest : UnwindDests) {
	UnwindDest.first->setIsEHPad();
	addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
	}
	FuncInfo.MBB->normalizeSuccProbs();

	// Create the terminator node.
	SDValue Ret =
	DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
	DAG.setRoot(Ret);
	}

	void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
	report_fatal_error("visitCatchSwitch not yet implemented!");
	}

	void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto &DL = DAG.getDataLayout();
	SDValue Chain = getControlRoot();
	SmallVector<ISD::OutputArg, 8> Outs;
	SmallVector<SDValue, 8> OutVals;

	// Calls to @llvm.experimental.deoptimize don't generate a return value, so
	// lower
	//
	// %val = call <ty> @llvm.experimental.deoptimize()
	// ret <ty> %val
	//
	// differently.
	if (I.getParent()->getTerminatingDeoptimizeCall()) {
	LowerDeoptimizingReturn();
	return;
	}

	if (!FuncInfo.CanLowerReturn) {
	unsigned DemoteReg = FuncInfo.DemoteRegister;
	const Function *F = I.getParent()->getParent();

	// Emit a store of the return value through the virtual register.
	// Leave Outs empty so that LowerReturn won't try to load return
	// registers the usual way.
	SmallVector<EVT, 1> PtrValueVTs;
	ComputeValueVTs(TLI, DL,
	F->getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	PtrValueVTs);

	SDValue RetPtr =
	DAG.getCopyFromReg(Chain, getCurSDLoc(), DemoteReg, PtrValueVTs[0]);
	SDValue RetOp = getValue(I.getOperand(0));

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
	&Offsets);
	unsigned NumValues = ValueVTs.size();

	SmallVector<SDValue, 4> Chains(NumValues);
	Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType());
	for (unsigned i = 0; i != NumValues; ++i) {
	// An aggregate return value cannot wrap around the address space, so
	// offsets to its parts don't wrap either.
	SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr,
	TypeSize::Fixed(Offsets[i]));

	SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
	if (MemVTs[i] != ValueVTs[i])
	Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
	Chains[i] = DAG.getStore(
	Chain, getCurSDLoc(), Val,
	// FIXME: better loc info would be nice.
	Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()),
	commonAlignment(BaseAlign, Offsets[i]));
	}

	Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
	MVT::Other, Chains);
	} else if (I.getNumOperands() != 0) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues) {
	SDValue RetOp = getValue(I.getOperand(0));

	const Function *F = I.getParent()->getParent();

	bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
	I.getOperand(0)->getType(), F->getCallingConv(),
	/IsVarArg/ false, DL);

	ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
	if (F->getAttributes().hasRetAttr(Attribute::SExt))
	ExtendKind = ISD::SIGN_EXTEND;
	else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
	ExtendKind = ISD::ZERO_EXTEND;

	LLVMContext &Context = F->getContext();
	bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);

	for (unsigned j = 0; j != NumValues; ++j) {
	EVT VT = ValueVTs[j];

	if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
	VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);

	CallingConv::ID CC = F->getCallingConv();

	unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
	MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
	SmallVector<SDValue, 4> Parts(NumParts);
	getCopyToParts(DAG, getCurSDLoc(),
	SDValue(RetOp.getNode(), RetOp.getResNo() + j),
	&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);

	// 'inreg' on function refers to return value
	ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
	if (RetInReg)
	Flags.setInReg();

	if (I.getOperand(0)->getType()->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
	}

	if (NeedsRegBlock) {
	Flags.setInConsecutiveRegs();
	if (j == NumValues - 1)
	Flags.setInConsecutiveRegsLast();
	}

	// Propagate extension type if any
	if (ExtendKind == ISD::SIGN_EXTEND)
	Flags.setSExt();
	else if (ExtendKind == ISD::ZERO_EXTEND)
	Flags.setZExt();

	for (unsigned i = 0; i < NumParts; ++i) {
	Outs.push_back(ISD::OutputArg(Flags,
	Parts[i].getValueType().getSimpleVT(),
	VT, /isfixed=/true, 0, 0));
	OutVals.push_back(Parts[i]);
	}
	}
	}
	}

	// Push in swifterror virtual register as the last element of Outs. This makes
	// sure swifterror virtual register will be returned in the swifterror
	// physical register.
	const Function *F = I.getParent()->getParent();
	if (TLI.supportSwiftError() &&
	F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
	assert(SwiftError.getFunctionArg() && "Need a swift error argument");
	ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
	Flags.setSwiftError();
	Outs.push_back(ISD::OutputArg(
	Flags, /vt=/TLI.getPointerTy(DL), /argvt=/EVT(TLI.getPointerTy(DL)),
	/isfixed=/true, /origidx=/1, /partOffs=/0));
	// Create SDNode for the swifterror virtual register.
	OutVals.push_back(
	DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
	&I, FuncInfo.MBB, SwiftError.getFunctionArg()),
	EVT(TLI.getPointerTy(DL))));
	}

	bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
	CallingConv::ID CallConv =
	DAG.getMachineFunction().getFunction().getCallingConv();
	Chain = DAG.getTargetLoweringInfo().LowerReturn(
	Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);

	// Verify that the target's LowerReturn behaved as expected.
	assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
	"LowerReturn didn't return a valid chain!");

	// Update the DAG with the new chain value resulting from return lowering.
	DAG.setRoot(Chain);
	}

	/// CopyToExportRegsIfNeeded - If the given value has virtual registers
	/// created for it, emit nodes to copy the value into the virtual
	/// registers.
	void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
	// Skip empty types
	if (V->getType()->isEmptyTy())
	return;

	DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	assert(!V->use_empty() && "Unused value assigned virtual registers!");
	CopyValueToVirtualRegister(V, VMI->second);
	}
	}

	/// ExportFromCurrentBlock - If this condition isn't known to be exported from
	/// the current basic block, add it to ValueMap now so that we'll get a
	/// CopyTo/FromReg.
	void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
	// No need to export constants.
	if (!isa<Instruction>(V) && !isa<Argument>(V)) return;

	// Already exported?
	if (FuncInfo.isExportedInst(V)) return;

	unsigned Reg = FuncInfo.InitializeRegForValue(V);
	CopyValueToVirtualRegister(V, Reg);
	}

	bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
	const BasicBlock *FromBB) {
	// The operands of the setcc have to be in this block. We don't know
	// how to export them from some other block.
	if (const Instruction *VI = dyn_cast<Instruction>(V)) {
	// Can export from current BB.
	if (VI->getParent() == FromBB)
	return true;

	// Is already exported, noop.
	return FuncInfo.isExportedInst(V);
	}

	// If this is an argument, we can export it if the BB is the entry block or
	// if it is already exported.
	if (isa<Argument>(V)) {
	if (FromBB->isEntryBlock())
	return true;

	// Otherwise, can only export this if it is already exported.
	return FuncInfo.isExportedInst(V);
	}

	// Otherwise, constants can always be exported.
	return true;
	}

	/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
	BranchProbability
	SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
	const MachineBasicBlock *Dst) const {
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	const BasicBlock *SrcBB = Src->getBasicBlock();
	const BasicBlock *DstBB = Dst->getBasicBlock();
	if (!BPI) {
	// If BPI is not available, set the default probability as 1 / N, where N is
	// the number of successors.
	auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
	return BranchProbability(1, SuccSize);
	}
	return BPI->getEdgeProbability(SrcBB, DstBB);
	}

	void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
	MachineBasicBlock *Dst,
	BranchProbability Prob) {
	if (!FuncInfo.BPI)
	Src->addSuccessorWithoutProb(Dst);
	else {
	if (Prob.isUnknown())
	Prob = getEdgeProbability(Src, Dst);
	Src->addSuccessor(Dst, Prob);
	}
	}

	static bool InBlock(const Value V, const BasicBlock BB) {
	if (const Instruction *I = dyn_cast<Instruction>(V))
	return I->getParent() == BB;
	return true;
	}

	/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
	/// This function emits a branch and is used at the leaves of an OR or an
	/// AND operator tree.
	void
	SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
	MachineBasicBlock *TBB,
	MachineBasicBlock *FBB,
	MachineBasicBlock *CurBB,
	MachineBasicBlock *SwitchBB,
	BranchProbability TProb,
	BranchProbability FProb,
	bool InvertCond) {
	const BasicBlock *BB = CurBB->getBasicBlock();

	// If the leaf of the tree is a comparison, merge the condition into
	// the caseblock.
	if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
	// The operands of the cmp have to be in this block. We don't know
	// how to export them from some other block. If this is the first block
	// of the sequence, no exporting is needed.
	if (CurBB == SwitchBB \|\|
	(isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
	isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
	ISD::CondCode Condition;
	if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
	ICmpInst::Predicate Pred =
	InvertCond ? IC->getInversePredicate() : IC->getPredicate();
	Condition = getICmpCondCode(Pred);
	} else {
	const FCmpInst *FC = cast<FCmpInst>(Cond);
	FCmpInst::Predicate Pred =
	InvertCond ? FC->getInversePredicate() : FC->getPredicate();
	Condition = getFCmpCondCode(Pred);
	if (TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);
	}

	CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
	TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
	SL->SwitchCases.push_back(CB);
	return;
	}
	}

	// Create a CaseBlock record representing this branch.
	ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
	CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
	nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
	SL->SwitchCases.push_back(CB);
	}

	void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
	MachineBasicBlock *TBB,
	MachineBasicBlock *FBB,
	MachineBasicBlock *CurBB,
	MachineBasicBlock *SwitchBB,
	Instruction::BinaryOps Opc,
	BranchProbability TProb,
	BranchProbability FProb,
	bool InvertCond) {
	// Skip over not part of the tree and remember to invert op and operands at
	// next level.
	Value *NotCond;
	if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
	InBlock(NotCond, CurBB->getBasicBlock())) {
	FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
	!InvertCond);
	return;
	}

	const Instruction *BOp = dyn_cast<Instruction>(Cond);
	const Value BOpOp0, BOpOp1;
	// Compute the effective opcode for Cond, taking into account whether it needs
	// to be inverted, e.g.
	// and (not (or A, B)), C
	// gets lowered as
	// and (and (not A, not B), C)
	Instruction::BinaryOps BOpc = (Instruction::BinaryOps)0;
	if (BOp) {
	BOpc = match(BOp, m_LogicalAnd(m_Value(BOpOp0), m_Value(BOpOp1)))
	? Instruction::And
	: (match(BOp, m_LogicalOr(m_Value(BOpOp0), m_Value(BOpOp1)))
	? Instruction::Or
	: (Instruction::BinaryOps)0);
	if (InvertCond) {
	if (BOpc == Instruction::And)
	BOpc = Instruction::Or;
	else if (BOpc == Instruction::Or)
	BOpc = Instruction::And;
	}
	}

	// If this node is not part of the or/and tree, emit it as a branch.
	// Note that all nodes in the tree should have same opcode.
	bool BOpIsInOrAndTree = BOpc && BOpc == Opc && BOp->hasOneUse();
	if (!BOpIsInOrAndTree \|\| BOp->getParent() != CurBB->getBasicBlock() \|\|
	!InBlock(BOpOp0, CurBB->getBasicBlock()) \|\|
	!InBlock(BOpOp1, CurBB->getBasicBlock())) {
	EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
	TProb, FProb, InvertCond);
	return;
	}

	// Create TmpBB after CurBB.
	MachineFunction::iterator BBI(CurBB);
	MachineFunction &MF = DAG.getMachineFunction();
	MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
	CurBB->getParent()->insert(++BBI, TmpBB);

	if (Opc == Instruction::Or) {
	// Codegen X \| Y as:
	// BB1:
	// jmp_if_X TBB
	// jmp TmpBB
	// TmpBB:
	// jmp_if_Y TBB
	// jmp FBB
	//

	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
	// The requirement is that
	// TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
	// = TrueProb for original BB.
	// Assuming the original probabilities are A and B, one choice is to set
	// BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
	// A/(1+B) and 2B/(1+B). This choice assumes that
	// TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
	// Another choice is to assume TrueProb for BB1 equals to TrueProb for
	// TmpBB, but the math is more complicated.

	auto NewTrueProb = TProb / 2;
	auto NewFalseProb = TProb / 2 + FProb;
	// Emit the LHS condition.
	FindMergedConditions(BOpOp0, TBB, TmpBB, CurBB, SwitchBB, Opc, NewTrueProb,
	NewFalseProb, InvertCond);

	// Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
	SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
	BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
	// Emit the RHS condition into TmpBB.
	FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
	Probs[1], InvertCond);
	} else {
	assert(Opc == Instruction::And && "Unknown merge op!");
	// Codegen X & Y as:
	// BB1:
	// jmp_if_X TmpBB
	// jmp FBB
	// TmpBB:
	// jmp_if_Y TBB
	// jmp FBB
	//
	// This requires creation of TmpBB after CurBB.

	// We have flexibility in setting Prob for BB1 and Prob for TmpBB.
	// The requirement is that
	// FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
	// = FalseProb for original BB.
	// Assuming the original probabilities are A and B, one choice is to set
	// BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
	// 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
	// TrueProb for BB1 * FalseProb for TmpBB.

	auto NewTrueProb = TProb + FProb / 2;
	auto NewFalseProb = FProb / 2;
	// Emit the LHS condition.
	FindMergedConditions(BOpOp0, TmpBB, FBB, CurBB, SwitchBB, Opc, NewTrueProb,
	NewFalseProb, InvertCond);

	// Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
	SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
	BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
	// Emit the RHS condition into TmpBB.
	FindMergedConditions(BOpOp1, TBB, FBB, TmpBB, SwitchBB, Opc, Probs[0],
	Probs[1], InvertCond);
	}
	}

	/// If the set of cases should be emitted as a series of branches, return true.
	/// If we should emit this as a bunch of and/or'd together conditions, return
	/// false.
	bool
	SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
	if (Cases.size() != 2) return true;

	// If this is two comparisons of the same values or'd or and'd together, they
	// will get folded into a single comparison, so don't emit two blocks.
	if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
	Cases[0].CmpRHS == Cases[1].CmpRHS) \|\|
	(Cases[0].CmpRHS == Cases[1].CmpLHS &&
	Cases[0].CmpLHS == Cases[1].CmpRHS)) {
	return false;
	}

	// Handle: (X != null) \| (Y != null) --> (X\|Y) != 0
	// Handle: (X == null) & (Y == null) --> (X\|Y) == 0
	if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
	Cases[0].CC == Cases[1].CC &&
	isa<Constant>(Cases[0].CmpRHS) &&
	cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
	if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
	return false;
	if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
	return false;
	}

	return true;
	}

	void SelectionDAGBuilder::visitBr(const BranchInst &I) {
	MachineBasicBlock *BrMBB = FuncInfo.MBB;

	// Update machine-CFG edges.
	MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];

	if (I.isUnconditional()) {
	// Update machine-CFG edges.
	BrMBB->addSuccessor(Succ0MBB);

	// If this is not a fall-through branch or optimizations are switched off,
	// emit the branch.
	if (Succ0MBB != NextBlock(BrMBB) \|\| TM.getOptLevel() == CodeGenOpt::None)
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Succ0MBB)));

	return;
	}

	// If this condition is one of the special cases we handle, do special stuff
	// now.
	const Value *CondVal = I.getCondition();
	MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];

	// If this is a series of conditions that are or'd or and'd together, emit
	// this as a sequence of branches instead of setcc's with and/or operations.
	// As long as jumps are not expensive (exceptions for multi-use logic ops,
	// unpredictable branches, and vector extracts because those jumps are likely
	// expensive for any target), this should improve performance.
	// For example, instead of something like:
	// cmp A, B
	// C = seteq
	// cmp D, E
	// F = setle
	// or C, F
	// jnz foo
	// Emit:
	// cmp A, B
	// je foo
	// cmp D, E
	// jle foo
	const Instruction *BOp = dyn_cast<Instruction>(CondVal);
	if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp &&
	BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable)) {
	Value *Vec;
	const Value BOp0, BOp1;
	Instruction::BinaryOps Opcode = (Instruction::BinaryOps)0;
	if (match(BOp, m_LogicalAnd(m_Value(BOp0), m_Value(BOp1))))
	Opcode = Instruction::And;
	else if (match(BOp, m_LogicalOr(m_Value(BOp0), m_Value(BOp1))))
	Opcode = Instruction::Or;

	if (Opcode && !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
	match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
	FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode,
	getEdgeProbability(BrMBB, Succ0MBB),
	getEdgeProbability(BrMBB, Succ1MBB),
	/InvertCond=/false);
	// If the compares in later blocks need to use values not currently
	// exported from this block, export them now. This block should always
	// be the first entry.
	assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");

	// Allow some cases to be rejected.
	if (ShouldEmitAsBranches(SL->SwitchCases)) {
	for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
	ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
	ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
	}

	// Emit the branch for this block.
	visitSwitchCase(SL->SwitchCases[0], BrMBB);
	SL->SwitchCases.erase(SL->SwitchCases.begin());
	return;
	}

	// Okay, we decided not to do this, remove any inserted MBB's and clear
	// SwitchCases.
	for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
	FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);

	SL->SwitchCases.clear();
	}
	}

	// Create a CaseBlock record representing this branch.
	CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
	nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());

	// Use visitSwitchCase to actually insert the fast branch sequence for this
	// cond branch.
	visitSwitchCase(CB, BrMBB);
	}

	/// visitSwitchCase - Emits the necessary code to represent a single node in
	/// the binary search tree resulting from lowering a switch instruction.
	void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
	MachineBasicBlock *SwitchBB) {
	SDValue Cond;
	SDValue CondLHS = getValue(CB.CmpLHS);
	SDLoc dl = CB.DL;

	if (CB.CC == ISD::SETTRUE) {
	// Branch or fall through to TrueBB.
	addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
	SwitchBB->normalizeSuccProbs();
	if (CB.TrueBB != NextBlock(SwitchBB)) {
	DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
	DAG.getBasicBlock(CB.TrueBB)));
	}
	return;
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());

	// Build the setcc now.
	if (!CB.CmpMHS) {
	// Fold "(X == true)" to X and "(X == false)" to !X to
	// handle common cases produced by branch lowering.
	if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
	CB.CC == ISD::SETEQ)
	Cond = CondLHS;
	else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
	CB.CC == ISD::SETEQ) {
	SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
	Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
	} else {
	SDValue CondRHS = getValue(CB.CmpRHS);

	// If a pointer's DAG type is larger than its memory type then the DAG
	// values are zero-extended. This breaks signed comparisons so truncate
	// back to the underlying type before doing the compare.
	if (CondLHS.getValueType() != MemVT) {
	CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
	CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
	}
	Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
	}
	} else {
	assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");

	const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
	const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();

	SDValue CmpOp = getValue(CB.CmpMHS);
	EVT VT = CmpOp.getValueType();

	if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
	Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
	ISD::SETLE);
	} else {
	SDValue SUB = DAG.getNode(ISD::SUB, dl,
	VT, CmpOp, DAG.getConstant(Low, dl, VT));
	Cond = DAG.getSetCC(dl, MVT::i1, SUB,
	DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
	}
	}

	// Update successor info
	addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
	// TrueBB and FalseBB are always different unless the incoming IR is
	// degenerate. This only happens when running llc on weird IR.
	if (CB.TrueBB != CB.FalseBB)
	addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
	SwitchBB->normalizeSuccProbs();

	// If the lhs block is the next block, invert the condition so that we can
	// fall through to the lhs instead of the rhs block.
	if (CB.TrueBB == NextBlock(SwitchBB)) {
	std::swap(CB.TrueBB, CB.FalseBB);
	SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
	Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
	}

	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, getControlRoot(), Cond,
	DAG.getBasicBlock(CB.TrueBB));

	// Insert the false branch. Do this even if it's a fall through branch,
	// this makes it easier to do DAG optimizations which require inverting
	// the branch condition.
	BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
	DAG.getBasicBlock(CB.FalseBB));

	DAG.setRoot(BrCond);
	}

	/// visitJumpTable - Emit JumpTable node in the current MBB
	void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
	// Emit the code for the jump table
	assert(JT.Reg != -1U && "Should lower JT Header first!");
	EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
	SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
	JT.Reg, PTy);
	SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
	SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
	MVT::Other, Index.getValue(1),
	Table, Index);
	DAG.setRoot(BrJumpTable);
	}

	/// visitJumpTableHeader - This function emits necessary code to produce index
	/// in the JumpTable from switch case.
	void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
	JumpTableHeader &JTH,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();

	// Subtract the lowest switch case value from the value being switched on.
	SDValue SwitchOp = getValue(JTH.SValue);
	EVT VT = SwitchOp.getValueType();
	SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
	DAG.getConstant(JTH.First, dl, VT));

	// The SDNode we just created, which holds the value being switched on minus
	// the smallest case value, needs to be copied to a virtual register so it
	// can be used as an index into the jump table in a subsequent basic block.
	// This value may be smaller or larger than the target's pointer type, and
	// therefore require extension or truncating.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));

	unsigned JumpTableReg =
	FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
	SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
	JumpTableReg, SwitchOp);
	JT.Reg = JumpTableReg;

	if (!JTH.FallthroughUnreachable) {
	// Emit the range check for the jump table, and branch to the default block
	// for the switch statement if the value being switched on exceeds the
	// largest case in the switch.
	SDValue CMP = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	Sub.getValueType()),
	Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);

	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, CopyTo, CMP,
	DAG.getBasicBlock(JT.Default));

	// Avoid emitting unnecessary branches to the next block.
	if (JT.MBB != NextBlock(SwitchBB))
	BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
	DAG.getBasicBlock(JT.MBB));

	DAG.setRoot(BrCond);
	} else {
	// Avoid emitting unnecessary branches to the next block.
	if (JT.MBB != NextBlock(SwitchBB))
	DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
	DAG.getBasicBlock(JT.MBB)));
	else
	DAG.setRoot(CopyTo);
	}
	}

	/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
	/// variable if there exists one.
	static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
	SDValue &Chain) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
	EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
	MachineFunction &MF = DAG.getMachineFunction();
	Value Global = TLI.getSDagStackGuard(MF.getFunction().getParent());
	MachineSDNode *Node =
	DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
	if (Global) {
	MachinePointerInfo MPInfo(Global);
	auto Flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOInvariant \|
	MachineMemOperand::MODereferenceable;
	MachineMemOperand *MemRef = MF.getMachineMemOperand(
	MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy));
	DAG.setNodeMemRefs(Node, {MemRef});
	}
	if (PtrTy != PtrMemTy)
	return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
	return SDValue(Node, 0);
	}

	/// Codegen a new tail for a stack protector check ParentMBB which has had its
	/// tail spliced into a stack protector check success bb.
	///
	/// For a high level explanation of how this fits into the stack protector
	/// generation see the comment on the declaration of class
	/// StackProtectorDescriptor.
	void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
	MachineBasicBlock *ParentBB) {

	// First create the loads to the guard/stack slot for the comparison.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
	EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());

	MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
	int FI = MFI.getStackProtectorIndex();

	SDValue Guard;
	SDLoc dl = getCurSDLoc();
	SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
	const Module &M = *ParentBB->getParent()->getFunction().getParent();
	Align Align =
	DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));

	// Generate code to load the content of the guard slot.
	SDValue GuardVal = DAG.getLoad(
	PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
	MachineMemOperand::MOVolatile);

	if (TLI.useStackGuardXorFP())
	GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);

	// Retrieve guard check function, nullptr if instrumentation is inlined.
	if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
	// The target provides a guard check function to validate the guard value.
	// Generate a call to that function with the content of the guard slot as
	// argument.
	FunctionType *FnTy = GuardCheckFn->getFunctionType();
	assert(FnTy->getNumParams() == 1 && "Invalid function signature");

	TargetLowering::ArgListTy Args;
	TargetLowering::ArgListEntry Entry;
	Entry.Node = GuardVal;
	Entry.Ty = FnTy->getParamType(0);
	if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
	Entry.IsInReg = true;
	Args.push_back(Entry);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(getCurSDLoc())
	.setChain(DAG.getEntryNode())
	.setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
	getValue(GuardCheckFn), std::move(Args));

	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
	DAG.setRoot(Result.second);
	return;
	}

	// If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
	// Otherwise, emit a volatile load to retrieve the stack guard value.
	SDValue Chain = DAG.getEntryNode();
	if (TLI.useLoadStackGuardNode()) {
	Guard = getLoadStackGuard(DAG, dl, Chain);
	} else {
	const Value *IRGuard = TLI.getSDagStackGuard(M);
	SDValue GuardPtr = getValue(IRGuard);

	Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
	MachinePointerInfo(IRGuard, 0), Align,
	MachineMemOperand::MOVolatile);
	}

	// Perform the comparison via a getsetcc.
	SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
	*DAG.getContext(),
	Guard.getValueType()),
	Guard, GuardVal, ISD::SETNE);

	// If the guard/stackslot do not equal, branch to failure MBB.
	SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, GuardVal.getOperand(0),
	Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
	// Otherwise branch to success MBB.
	SDValue Br = DAG.getNode(ISD::BR, dl,
	MVT::Other, BrCond,
	DAG.getBasicBlock(SPD.getSuccessMBB()));

	DAG.setRoot(Br);
	}

	/// Codegen the failure basic block for a stack protector check.
	///
	/// A failure stack protector machine basic block consists simply of a call to
	/// __stack_chk_fail().
	///
	/// For a high level explanation of how this fits into the stack protector
	/// generation see the comment on the declaration of class
	/// StackProtectorDescriptor.
	void
	SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setDiscardResult(true);
	SDValue Chain =
	TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
	None, CallOptions, getCurSDLoc()).second;
	// On PS4/PS5, the "return address" must still be within the calling
	// function, even if it's at the very end, so emit an explicit TRAP here.
	// Passing 'true' for doesNotReturn above won't generate the trap for us.
	if (TM.getTargetTriple().isPS())
	Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
	// WebAssembly needs an unreachable instruction after a non-returning call,
	// because the function return type can be different from __stack_chk_fail's
	// return type (void).
	if (TM.getTargetTriple().isWasm())
	Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);

	DAG.setRoot(Chain);
	}

	/// visitBitTestHeader - This function emits necessary code to produce value
	/// suitable for "bit tests"
	void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();

	// Subtract the minimum value.
	SDValue SwitchOp = getValue(B.SValue);
	EVT VT = SwitchOp.getValueType();
	SDValue RangeSub =
	DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));

	// Determine the type of the test operands.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool UsePtrType = false;
	if (!TLI.isTypeLegal(VT)) {
	UsePtrType = true;
	} else {
	for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
	if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
	// Switch table case range are encoded into series of masks.
	// Just use pointer type, it's guaranteed to fit.
	UsePtrType = true;
	break;
	}
	}
	SDValue Sub = RangeSub;
	if (UsePtrType) {
	VT = TLI.getPointerTy(DAG.getDataLayout());
	Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
	}

	B.RegVT = VT.getSimpleVT();
	B.Reg = FuncInfo.CreateReg(B.RegVT);
	SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);

	MachineBasicBlock* MBB = B.Cases[0].ThisBB;

	if (!B.FallthroughUnreachable)
	addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
	addSuccessorWithProb(SwitchBB, MBB, B.Prob);
	SwitchBB->normalizeSuccProbs();

	SDValue Root = CopyTo;
	if (!B.FallthroughUnreachable) {
	// Conditional branch to the default block.
	SDValue RangeCmp = DAG.getSetCC(dl,
	TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
	RangeSub.getValueType()),
	RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
	ISD::SETUGT);

	Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
	DAG.getBasicBlock(B.Default));
	}

	// Avoid emitting unnecessary branches to the next block.
	if (MBB != NextBlock(SwitchBB))
	Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));

	DAG.setRoot(Root);
	}

	/// visitBitTestCase - this function produces one "bit test"
	void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
	MachineBasicBlock* NextMBB,
	BranchProbability BranchProbToNext,
	unsigned Reg,
	BitTestCase &B,
	MachineBasicBlock *SwitchBB) {
	SDLoc dl = getCurSDLoc();
	MVT VT = BB.RegVT;
	SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
	SDValue Cmp;
	unsigned PopCount = countPopulation(B.Mask);
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (PopCount == 1) {
	// Testing for a single bit; just compare the shift count with what it
	// would need to be to shift a 1 bit in that position.
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
	ISD::SETEQ);
	} else if (PopCount == BB.Range) {
	// There is only one zero bit in the range, test for it directly.
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
	ISD::SETNE);
	} else {
	// Make desired shift
	SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
	DAG.getConstant(1, dl, VT), ShiftOp);

	// Emit bit tests and jumps
	SDValue AndOp = DAG.getNode(ISD::AND, dl,
	VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
	Cmp = DAG.getSetCC(
	dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
	AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
	}

	// The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
	addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
	// The branch probability from SwitchBB to NextMBB is BranchProbToNext.
	addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
	// It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
	// one as they are relative probabilities (and thus work more like weights),
	// and hence we need to normalize them to let the sum of them become one.
	SwitchBB->normalizeSuccProbs();

	SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
	MVT::Other, getControlRoot(),
	Cmp, DAG.getBasicBlock(B.TargetBB));

	// Avoid emitting unnecessary branches to the next block.
	if (NextMBB != NextBlock(SwitchBB))
	BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
	DAG.getBasicBlock(NextMBB));

	DAG.setRoot(BrAnd);
	}

	void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
	MachineBasicBlock *InvokeMBB = FuncInfo.MBB;

	// Retrieve successors. Look through artificial IR level blocks like
	// catchswitch for successors.
	MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
	const BasicBlock *EHPadBB = I.getSuccessor(1);

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_gc_transition,
	LLVMContext::OB_gc_live, LLVMContext::OB_funclet,
	LLVMContext::OB_cfguardtarget,
	LLVMContext::OB_clang_arc_attachedcall}) &&
	"Cannot lower invokes with arbitrary operand bundles yet!");

	const Value *Callee(I.getCalledOperand());
	const Function *Fn = dyn_cast<Function>(Callee);
	if (isa<InlineAsm>(Callee))
	visitInlineAsm(I, EHPadBB);
	else if (Fn && Fn->isIntrinsic()) {
	switch (Fn->getIntrinsicID()) {
	default:
	llvm_unreachable("Cannot invoke this intrinsic");
	case Intrinsic::donothing:
	// Ignore invokes to @llvm.donothing: jump directly to the next BB.
	case Intrinsic::seh_try_begin:
	case Intrinsic::seh_scope_begin:
	case Intrinsic::seh_try_end:
	case Intrinsic::seh_scope_end:
	break;
	case Intrinsic::experimental_patchpoint_void:
	case Intrinsic::experimental_patchpoint_i64:
	visitPatchpoint(I, EHPadBB);
	break;
	case Intrinsic::experimental_gc_statepoint:
	LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB);
	break;
	case Intrinsic::wasm_rethrow: {
	// This is usually done in visitTargetIntrinsic, but this intrinsic is
	// special because it can be invoked, so we manually lower it to a DAG
	// node here.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(getRoot()); // inchain
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	Ops.push_back(
	DAG.getTargetConstant(Intrinsic::wasm_rethrow, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout())));
	SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
	DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
	break;
	}
	}
	} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
	// Currently we do not lower any intrinsic calls with deopt operand bundles.
	// Eventually we will support lowering the @llvm.experimental.deoptimize
	// intrinsic, and right now there are no plans to support other intrinsics
	// with deopt state.
	LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
	} else {
	LowerCallTo(I, getValue(Callee), false, false, EHPadBB);
	}

	// If the value of the invoke is used outside of its defining block, make it
	// available as a virtual register.
	// We already took care of the exported value for the statepoint instruction
	// during call to the LowerStatepoint.
	if (!isa<GCStatepointInst>(I)) {
	CopyToExportRegsIfNeeded(&I);
	}

	SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	BranchProbability EHPadBBProb =
	BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
	: BranchProbability::getZero();
	findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);

	// Update successor info.
	addSuccessorWithProb(InvokeMBB, Return);
	for (auto &UnwindDest : UnwindDests) {
	UnwindDest.first->setIsEHPad();
	addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
	}
	InvokeMBB->normalizeSuccProbs();

	// Drop into normal successor.
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Return)));
	}

	void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
	MachineBasicBlock *CallBrMBB = FuncInfo.MBB;

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
	"Cannot lower callbrs with arbitrary operand bundles yet!");

	assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr");
	visitInlineAsm(I);
	CopyToExportRegsIfNeeded(&I);

	// Retrieve successors.
	SmallPtrSet<BasicBlock *, 8> Dests;
	Dests.insert(I.getDefaultDest());
	MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];

	// Update successor info.
	addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
	for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
	BasicBlock *Dest = I.getIndirectDest(i);
	MachineBasicBlock *Target = FuncInfo.MBBMap[Dest];
	Target->setIsInlineAsmBrIndirectTarget();
	Target->setHasAddressTaken();
	// Don't add duplicate machine successors.
	if (Dests.insert(Dest).second)
	addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
	}
	CallBrMBB->normalizeSuccProbs();

	// Drop into default successor.
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	DAG.getBasicBlock(Return)));
	}

	void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
	llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
	}

	void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
	assert(FuncInfo.MBB->isEHPad() &&
	"Call to landingpad not in landing pad!");

	// If there aren't registers to copy the values into (e.g., during SjLj
	// exceptions), then don't bother to create these DAG nodes.
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
	if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
	TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
	return;

	// If landingpad's return type is token type, we don't create DAG nodes
	// for its exception pointer and selector value. The extraction of exception
	// pointer or selector value from token type landingpads is not currently
	// supported.
	if (LP.getType()->isTokenTy())
	return;

	SmallVector<EVT, 2> ValueVTs;
	SDLoc dl = getCurSDLoc();
	ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
	assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");

	// Get the two live-in registers as SDValues. The physregs have already been
	// copied into virtual registers.
	SDValue Ops[2];
	if (FuncInfo.ExceptionPointerVirtReg) {
	Ops[0] = DAG.getZExtOrTrunc(
	DAG.getCopyFromReg(DAG.getEntryNode(), dl,
	FuncInfo.ExceptionPointerVirtReg,
	TLI.getPointerTy(DAG.getDataLayout())),
	dl, ValueVTs[0]);
	} else {
	Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
	}
	Ops[1] = DAG.getZExtOrTrunc(
	DAG.getCopyFromReg(DAG.getEntryNode(), dl,
	FuncInfo.ExceptionSelectorVirtReg,
	TLI.getPointerTy(DAG.getDataLayout())),
	dl, ValueVTs[1]);

	// Merge into one.
	SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
	DAG.getVTList(ValueVTs), Ops);
	setValue(&LP, Res);
	}

	void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
	MachineBasicBlock *Last) {
	// Update JTCases.
	for (JumpTableBlock &JTB : SL->JTCases)
	if (JTB.first.HeaderBB == First)
	JTB.first.HeaderBB = Last;

	// Update BitTestCases.
	for (BitTestBlock &BTB : SL->BitTestCases)
	if (BTB.Parent == First)
	BTB.Parent = Last;
	}

	void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
	MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;

	// Update machine-CFG edges with unique successors.
	SmallSet<BasicBlock*, 32> Done;
	for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
	BasicBlock *BB = I.getSuccessor(i);
	bool Inserted = Done.insert(BB).second;
	if (!Inserted)
	continue;

	MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
	addSuccessorWithProb(IndirectBrMBB, Succ);
	}
	IndirectBrMBB->normalizeSuccProbs();

	DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
	MVT::Other, getControlRoot(),
	getValue(I.getAddress())));
	}

	void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
	if (!DAG.getTarget().Options.TrapUnreachable)
	return;

	// We may be able to ignore unreachable behind a noreturn call.
	if (DAG.getTarget().Options.NoTrapAfterNoreturn) {
	const BasicBlock &BB = *I.getParent();
	if (&I != &BB.front()) {
	BasicBlock::const_iterator PredI =
	std::prev(BasicBlock::const_iterator(&I));
	if (const CallInst Call = dyn_cast<CallInst>(&PredI)) {
	if (Call->doesNotReturn())
	return;
	}
	}
	}

	DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
	}

	void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
	SDNodeFlags Flags;
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	SDValue Op = getValue(I.getOperand(0));
	SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
	Op, Flags);
	setValue(&I, UnNodeValue);
	}

	void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
	SDNodeFlags Flags;
	if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
	Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
	Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
	}
	if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
	Flags.setExact(ExactOp->isExact());
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));
	SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
	Op1, Op2, Flags);
	setValue(&I, BinNodeValue);
	}

	void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
	Op1.getValueType(), DAG.getDataLayout());

	// Coerce the shift amount to the right type if we can. This exposes the
	// truncate or zext to optimization early.
	if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
	assert(ShiftTy.getSizeInBits() >= Log2_32_Ceil(Op1.getValueSizeInBits()) &&
	"Unexpected shift type");
	Op2 = DAG.getZExtOrTrunc(Op2, getCurSDLoc(), ShiftTy);
	}

	bool nuw = false;
	bool nsw = false;
	bool exact = false;

	if (Opcode == ISD::SRL \|\| Opcode == ISD::SRA \|\| Opcode == ISD::SHL) {

	if (const OverflowingBinaryOperator *OFBinOp =
	dyn_cast<const OverflowingBinaryOperator>(&I)) {
	nuw = OFBinOp->hasNoUnsignedWrap();
	nsw = OFBinOp->hasNoSignedWrap();
	}
	if (const PossiblyExactOperator *ExactOp =
	dyn_cast<const PossiblyExactOperator>(&I))
	exact = ExactOp->isExact();
	}
	SDNodeFlags Flags;
	Flags.setExact(exact);
	Flags.setNoSignedWrap(nsw);
	Flags.setNoUnsignedWrap(nuw);
	SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
	Flags);
	setValue(&I, Res);
	}

	void SelectionDAGBuilder::visitSDiv(const User &I) {
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	SDNodeFlags Flags;
	Flags.setExact(isa<PossiblyExactOperator>(&I) &&
	cast<PossiblyExactOperator>(&I)->isExact());
	setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
	Op2, Flags));
	}

	void SelectionDAGBuilder::visitICmp(const User &I) {
	ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
	if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
	predicate = IC->getPredicate();
	else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
	predicate = ICmpInst::Predicate(IC->getPredicate());
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));
	ISD::CondCode Opcode = getICmpCondCode(predicate);

	auto &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());

	// If a pointer's DAG type is larger than its memory type then the DAG values
	// are zero-extended. This breaks signed comparisons so truncate back to the
	// underlying type before doing the compare.
	if (Op1.getValueType() != MemVT) {
	Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
	Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
	}

	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
	}

	void SelectionDAGBuilder::visitFCmp(const User &I) {
	FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
	if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
	predicate = FC->getPredicate();
	else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
	predicate = FCmpInst::Predicate(FC->getPredicate());
	SDValue Op1 = getValue(I.getOperand(0));
	SDValue Op2 = getValue(I.getOperand(1));

	ISD::CondCode Condition = getFCmpCondCode(predicate);
	auto *FPMO = cast<FPMathOperator>(&I);
	if (FPMO->hasNoNaNs() \|\| TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);

	SDNodeFlags Flags;
	Flags.copyFMF(*FPMO);
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);

	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
	}

	// Check if the condition of the select has one use or two users that are both
	// selects with the same condition.
	static bool hasOnlySelectUsers(const Value *Cond) {
	return llvm::all_of(Cond->users(), [](const Value *V) {
	return isa<SelectInst>(V);
	});
	}

	void SelectionDAGBuilder::visitSelect(const User &I) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
	ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0) return;

	SmallVector<SDValue, 4> Values(NumValues);
	SDValue Cond = getValue(I.getOperand(0));
	SDValue LHSVal = getValue(I.getOperand(1));
	SDValue RHSVal = getValue(I.getOperand(2));
	SmallVector<SDValue, 1> BaseOps(1, Cond);
	ISD::NodeType OpCode =
	Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT;

	bool IsUnaryAbs = false;
	bool Negate = false;

	SDNodeFlags Flags;
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	// Min/max matching is only viable if all output VTs are the same.
	if (is_splat(ValueVTs)) {
	EVT VT = ValueVTs[0];
	LLVMContext &Ctx = *DAG.getContext();
	auto &TLI = DAG.getTargetLoweringInfo();

	// We care about the legality of the operation after it has been type
	// legalized.
	while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
	VT = TLI.getTypeToTransformTo(Ctx, VT);

	// If the vselect is legal, assume we want to leave this as a vector setcc +
	// vselect. Otherwise, if this is going to be scalarized, we want to see if
	// min/max is legal on the scalar type.
	bool UseScalarMinMax = VT.isVector() &&
	!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);

	Value LHS, RHS;
	auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
	ISD::NodeType Opc = ISD::DELETED_NODE;
	switch (SPR.Flavor) {
	case SPF_UMAX: Opc = ISD::UMAX; break;
	case SPF_UMIN: Opc = ISD::UMIN; break;
	case SPF_SMAX: Opc = ISD::SMAX; break;
	case SPF_SMIN: Opc = ISD::SMIN; break;
	case SPF_FMINNUM:
	switch (SPR.NaNBehavior) {
	case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
	case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
	case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
	case SPNB_RETURNS_ANY: {
	if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
	Opc = ISD::FMINNUM;
	else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
	Opc = ISD::FMINIMUM;
	else if (UseScalarMinMax)
	Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
	ISD::FMINNUM : ISD::FMINIMUM;
	break;
	}
	}
	break;
	case SPF_FMAXNUM:
	switch (SPR.NaNBehavior) {
	case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
	case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
	case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
	case SPNB_RETURNS_ANY:

	if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
	Opc = ISD::FMAXNUM;
	else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
	Opc = ISD::FMAXIMUM;
	else if (UseScalarMinMax)
	Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
	ISD::FMAXNUM : ISD::FMAXIMUM;
	break;
	}
	break;
	case SPF_NABS:
	Negate = true;
	LLVM_FALLTHROUGH;
	case SPF_ABS:
	IsUnaryAbs = true;
	Opc = ISD::ABS;
	break;
	default: break;
	}

	if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
	(TLI.isOperationLegalOrCustom(Opc, VT) \|\|
	(UseScalarMinMax &&
	TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
	// If the underlying comparison instruction is used by any other
	// instruction, the consumed instructions won't be destroyed, so it is
	// not profitable to convert to a min/max.
	hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
	OpCode = Opc;
	LHSVal = getValue(LHS);
	RHSVal = getValue(RHS);
	BaseOps.clear();
	}

	if (IsUnaryAbs) {
	OpCode = Opc;
	LHSVal = getValue(LHS);
	BaseOps.clear();
	}
	}

	if (IsUnaryAbs) {
	for (unsigned i = 0; i != NumValues; ++i) {
	SDLoc dl = getCurSDLoc();
	EVT VT = LHSVal.getNode()->getValueType(LHSVal.getResNo() + i);
	Values[i] =
	DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i));
	if (Negate)
	Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
	Values[i]);
	}
	} else {
	for (unsigned i = 0; i != NumValues; ++i) {
	SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
	Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
	Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
	Values[i] = DAG.getNode(
	OpCode, getCurSDLoc(),
	LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops, Flags);
	}
	}

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitTrunc(const User &I) {
	// TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitZExt(const User &I) {
	// ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
	// ZExt also can't be a cast to bool for same reason. So, nothing much to do
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitSExt(const User &I) {
	// SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
	// SExt also can't be a cast to bool for same reason. So, nothing much to do
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPTrunc(const User &I) {
	// FPTrunc is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	SDLoc dl = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
	DAG.getTargetConstant(
	0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
	}

	void SelectionDAGBuilder::visitFPExt(const User &I) {
	// FPExt is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPToUI(const User &I) {
	// FPToUI is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitFPToSI(const User &I) {
	// FPToSI is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitUIToFP(const User &I) {
	// UIToFP is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitSIToFP(const User &I) {
	// SIToFP is never a no-op cast, no need to check
	SDValue N = getValue(I.getOperand(0));
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
	}

	void SelectionDAGBuilder::visitPtrToInt(const User &I) {
	// What to do depends on the size of the integer and the size of the pointer.
	// We can either truncate, zero extend, or no-op, accordingly.
	SDValue N = getValue(I.getOperand(0));
	auto &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());
	EVT PtrMemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
	N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
	N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitIntToPtr(const User &I) {
	// What to do depends on the size of the integer and the size of the pointer.
	// We can either truncate, zero extend, or no-op, accordingly.
	SDValue N = getValue(I.getOperand(0));
	auto &TLI = DAG.getTargetLoweringInfo();
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
	N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
	N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitBitCast(const User &I) {
	SDValue N = getValue(I.getOperand(0));
	SDLoc dl = getCurSDLoc();
	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType());

	// BitCast assures us that source and destination are the same size so this is
	// either a BITCAST or a no-op.
	if (DestVT != N.getValueType())
	setValue(&I, DAG.getNode(ISD::BITCAST, dl,
	DestVT, N)); // convert types.
	// Check if the original LLVM IR Operand was a ConstantInt, because getValue()
	// might fold any kind of constant expression to an integer constant and that
	// is not what we are looking for. Only recognize a bitcast of a genuine
	// constant integer as an opaque constant.
	else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
	setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /isTarget=/false,
	/isOpaque/true));
	else
	setValue(&I, N); // noop cast.
	}

	void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Value *SV = I.getOperand(0);
	SDValue N = getValue(SV);
	EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	unsigned SrcAS = SV->getType()->getPointerAddressSpace();
	unsigned DestAS = I.getType()->getPointerAddressSpace();

	if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS))
	N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);

	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitInsertElement(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue InVec = getValue(I.getOperand(0));
	SDValue InVal = getValue(I.getOperand(1));
	SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	InVec, InVal, InIdx));
	}

	void SelectionDAGBuilder::visitExtractElement(const User &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue InVec = getValue(I.getOperand(0));
	SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
	TLI.getVectorIdxTy(DAG.getDataLayout()));
	setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	InVec, InIdx));
	}

	void SelectionDAGBuilder::visitShuffleVector(const User &I) {
	SDValue Src1 = getValue(I.getOperand(0));
	SDValue Src2 = getValue(I.getOperand(1));
	ArrayRef<int> Mask;
	if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I))
	Mask = SVI->getShuffleMask();
	else
	Mask = cast<ConstantExpr>(I).getShuffleMask();
	SDLoc DL = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT SrcVT = Src1.getValueType();

	if (all_of(Mask, [](int Elem) { return Elem == 0; }) &&
	VT.isScalableVector()) {
	// Canonical splat form of first element of first input vector.
	SDValue FirstElt =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1,
	DAG.getVectorIdxConstant(0, DL));
	setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
	return;
	}

	// For now, we only handle splats for scalable vectors.
	// The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
	// for targets that support a SPLAT_VECTOR for non-scalable vector types.
	assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");

	unsigned SrcNumElts = SrcVT.getVectorNumElements();
	unsigned MaskNumElts = Mask.size();

	if (SrcNumElts == MaskNumElts) {
	setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
	return;
	}

	// Normalize the shuffle vector since mask and vector length don't match.
	if (SrcNumElts < MaskNumElts) {
	// Mask is longer than the source vectors. We can use concatenate vector to
	// make the mask and vectors lengths match.

	if (MaskNumElts % SrcNumElts == 0) {
	// Mask length is a multiple of the source vector length.
	// Check if the shuffle is some kind of concatenation of the input
	// vectors.
	unsigned NumConcat = MaskNumElts / SrcNumElts;
	bool IsConcat = true;
	SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
	for (unsigned i = 0; i != MaskNumElts; ++i) {
	int Idx = Mask[i];
	if (Idx < 0)
	continue;
	// Ensure the indices in each SrcVT sized piece are sequential and that
	// the same source is used for the whole piece.
	if ((Idx % SrcNumElts != (i % SrcNumElts)) \|\|
	(ConcatSrcs[i / SrcNumElts] >= 0 &&
	ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
	IsConcat = false;
	break;
	}
	// Remember which source this index came from.
	ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
	}

	// The shuffle is concatenating multiple vectors together. Just emit
	// a CONCAT_VECTORS operation.
	if (IsConcat) {
	SmallVector<SDValue, 8> ConcatOps;
	for (auto Src : ConcatSrcs) {
	if (Src < 0)
	ConcatOps.push_back(DAG.getUNDEF(SrcVT));
	else if (Src == 0)
	ConcatOps.push_back(Src1);
	else
	ConcatOps.push_back(Src2);
	}
	setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
	return;
	}
	}

	unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
	unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
	EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
	PaddedMaskNumElts);

	// Pad both vectors with undefs to make them the same length as the mask.
	SDValue UndefVal = DAG.getUNDEF(SrcVT);

	SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
	SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
	MOps1[0] = Src1;
	MOps2[0] = Src2;

	Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
	Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);

	// Readjust mask for new input vector length.
	SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
	for (unsigned i = 0; i != MaskNumElts; ++i) {
	int Idx = Mask[i];
	if (Idx >= (int)SrcNumElts)
	Idx -= SrcNumElts - PaddedMaskNumElts;
	MappedOps[i] = Idx;
	}

	SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);

	// If the concatenated vector was padded, extract a subvector with the
	// correct number of elements.
	if (MaskNumElts != PaddedMaskNumElts)
	Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
	DAG.getVectorIdxConstant(0, DL));

	setValue(&I, Result);
	return;
	}

	if (SrcNumElts > MaskNumElts) {
	// Analyze the access pattern of the vector to see if we can extract
	// two subvectors and do the shuffle.
	int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
	bool CanExtract = true;
	for (int Idx : Mask) {
	unsigned Input = 0;
	if (Idx < 0)
	continue;

	if (Idx >= (int)SrcNumElts) {
	Input = 1;
	Idx -= SrcNumElts;
	}

	// If all the indices come from the same MaskNumElts sized portion of
	// the sources we can use extract. Also make sure the extract wouldn't
	// extract past the end of the source.
	int NewStartIdx = alignDown(Idx, MaskNumElts);
	if (NewStartIdx + MaskNumElts > SrcNumElts \|\|
	(StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
	CanExtract = false;
	// Make sure we always update StartIdx as we use it to track if all
	// elements are undef.
	StartIdx[Input] = NewStartIdx;
	}

	if (StartIdx[0] < 0 && StartIdx[1] < 0) {
	setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
	return;
	}
	if (CanExtract) {
	// Extract appropriate subvector and generate a vector shuffle
	for (unsigned Input = 0; Input < 2; ++Input) {
	SDValue &Src = Input == 0 ? Src1 : Src2;
	if (StartIdx[Input] < 0)
	Src = DAG.getUNDEF(VT);
	else {
	Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
	DAG.getVectorIdxConstant(StartIdx[Input], DL));
	}
	}

	// Calculate new mask.
	SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
	for (int &Idx : MappedOps) {
	if (Idx >= (int)SrcNumElts)
	Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
	else if (Idx >= 0)
	Idx -= StartIdx[0];
	}

	setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
	return;
	}
	}

	// We can't use either concat vectors or extract subvectors so fall back to
	// replacing the shuffle with extract and build vector.
	// to insert and build vector.
	EVT EltVT = VT.getVectorElementType();
	SmallVector<SDValue,8> Ops;
	for (int Idx : Mask) {
	SDValue Res;

	if (Idx < 0) {
	Res = DAG.getUNDEF(EltVT);
	} else {
	SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
	if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;

	Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src,
	DAG.getVectorIdxConstant(Idx, DL));
	}

	Ops.push_back(Res);
	}

	setValue(&I, DAG.getBuildVector(VT, DL, Ops));
	}

	void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
	ArrayRef<unsigned> Indices = I.getIndices();
	const Value *Op0 = I.getOperand(0);
	const Value *Op1 = I.getOperand(1);
	Type *AggTy = I.getType();
	Type *ValTy = Op1->getType();
	bool IntoUndef = isa<UndefValue>(Op0);
	bool FromUndef = isa<UndefValue>(Op1);

	unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> AggValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
	SmallVector<EVT, 4> ValValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);

	unsigned NumAggValues = AggValueVTs.size();
	unsigned NumValValues = ValValueVTs.size();
	SmallVector<SDValue, 4> Values(NumAggValues);

	// Ignore an insertvalue that produces an empty object
	if (!NumAggValues) {
	setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
	return;
	}

	SDValue Agg = getValue(Op0);
	unsigned i = 0;
	// Copy the beginning value(s) from the original aggregate.
	for (; i != LinearIndex; ++i)
	Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);
	// Copy values from the inserted value(s).
	if (NumValValues) {
	SDValue Val = getValue(Op1);
	for (; i != LinearIndex + NumValValues; ++i)
	Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
	}
	// Copy remaining value(s) from the original aggregate.
	for (; i != NumAggValues; ++i)
	Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(AggValueVTs), Values));
	}

	void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
	ArrayRef<unsigned> Indices = I.getIndices();
	const Value *Op0 = I.getOperand(0);
	Type *AggTy = Op0->getType();
	Type *ValTy = I.getType();
	bool OutOfUndef = isa<UndefValue>(Op0);

	unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> ValValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);

	unsigned NumValValues = ValValueVTs.size();

	// Ignore a extractvalue that produces an empty object
	if (!NumValValues) {
	setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
	return;
	}

	SmallVector<SDValue, 4> Values(NumValValues);

	SDValue Agg = getValue(Op0);
	// Copy out the selected value(s).
	for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
	Values[i - LinearIndex] =
	OutOfUndef ?
	DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
	SDValue(Agg.getNode(), Agg.getResNo() + i);

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValValueVTs), Values));
	}

	void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
	Value *Op0 = I.getOperand(0);
	// Note that the pointer operand may be a vector of pointers. Take the scalar
	// element which holds a pointer.
	unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
	SDValue N = getValue(Op0);
	SDLoc dl = getCurSDLoc();
	auto &TLI = DAG.getTargetLoweringInfo();

	// Normalize Vector GEP - all scalar operands should be converted to the
	// splat vector.
	bool IsVectorGEP = I.getType()->isVectorTy();
	ElementCount VectorElementCount =
	IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount()
	: ElementCount::getFixed(0);

	if (IsVectorGEP && !N.getValueType().isVector()) {
	LLVMContext &Context = *DAG.getContext();
	EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
	if (VectorElementCount.isScalable())
	N = DAG.getSplatVector(VT, dl, N);
	else
	N = DAG.getSplatBuildVector(VT, dl, N);
	}

	for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
	GTI != E; ++GTI) {
	const Value *Idx = GTI.getOperand();
	if (StructType *StTy = GTI.getStructTypeOrNull()) {
	unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
	if (Field) {
	// N = N + Offset
	uint64_t Offset =
	DAG.getDataLayout().getStructLayout(StTy)->getElementOffset(Field);

	// In an inbounds GEP with an offset that is nonnegative even when
	// interpreted as signed, assume there is no unsigned overflow.
	SDNodeFlags Flags;
	if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
	Flags.setNoUnsignedWrap(true);

	N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
	DAG.getConstant(Offset, dl, N.getValueType()), Flags);
	}
	} else {
	// IdxSize is the width of the arithmetic according to IR semantics.
	// In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
	// (and fix up the result later).
	unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
	MVT IdxTy = MVT::getIntegerVT(IdxSize);
	TypeSize ElementSize =
	DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType());
	// We intentionally mask away the high bits here; ElementSize may not
	// fit in IdxTy.
	APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
	bool ElementScalable = ElementSize.isScalable();

	// If this is a scalar constant or a splat vector of constants,
	// handle it quickly.
	const auto *C = dyn_cast<Constant>(Idx);
	if (C && isa<VectorType>(C->getType()))
	C = C->getSplatValue();

	const auto *CI = dyn_cast_or_null<ConstantInt>(C);
	if (CI && CI->isZero())
	continue;
	if (CI && !ElementScalable) {
	APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
	LLVMContext &Context = *DAG.getContext();
	SDValue OffsVal;
	if (IsVectorGEP)
	OffsVal = DAG.getConstant(
	Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
	else
	OffsVal = DAG.getConstant(Offs, dl, IdxTy);

	// In an inbounds GEP with an offset that is nonnegative even when
	// interpreted as signed, assume there is no unsigned overflow.
	SDNodeFlags Flags;
	if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
	Flags.setNoUnsignedWrap(true);

	OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());

	N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
	continue;
	}

	// N = N + Idx * ElementMul;
	SDValue IdxN = getValue(Idx);

	if (!IdxN.getValueType().isVector() && IsVectorGEP) {
	EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
	VectorElementCount);
	if (VectorElementCount.isScalable())
	IdxN = DAG.getSplatVector(VT, dl, IdxN);
	else
	IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
	}

	// If the index is smaller or larger than intptr_t, truncate or extend
	// it.
	IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());

	if (ElementScalable) {
	EVT VScaleTy = N.getValueType().getScalarType();
	SDValue VScale = DAG.getNode(
	ISD::VSCALE, dl, VScaleTy,
	DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
	if (IsVectorGEP)
	VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
	IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
	} else {
	// If this is a multiply by a power of two, turn it into a shl
	// immediately. This is a very common case.
	if (ElementMul != 1) {
	if (ElementMul.isPowerOf2()) {
	unsigned Amt = ElementMul.logBase2();
	IdxN = DAG.getNode(ISD::SHL, dl,
	N.getValueType(), IdxN,
	DAG.getConstant(Amt, dl, IdxN.getValueType()));
	} else {
	SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
	IdxN.getValueType());
	IdxN = DAG.getNode(ISD::MUL, dl,
	N.getValueType(), IdxN, Scale);
	}
	}
	}

	N = DAG.getNode(ISD::ADD, dl,
	N.getValueType(), N, IdxN);
	}
	}

	MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
	MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
	if (IsVectorGEP) {
	PtrTy = MVT::getVectorVT(PtrTy, VectorElementCount);
	PtrMemTy = MVT::getVectorVT(PtrMemTy, VectorElementCount);
	}

	if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
	N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);

	setValue(&I, N);
	}

	void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
	// If this is a fixed sized alloca in the entry block of the function,
	// allocate it statically on the stack.
	if (FuncInfo.StaticAllocaMap.count(&I))
	return; // getValue will auto-populate this.

	SDLoc dl = getCurSDLoc();
	Type *Ty = I.getAllocatedType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto &DL = DAG.getDataLayout();
	TypeSize TySize = DL.getTypeAllocSize(Ty);
	MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign());

	SDValue AllocSize = getValue(I.getArraySize());

	EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
	if (AllocSize.getValueType() != IntPtr)
	AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);

	if (TySize.isScalable())
	AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
	DAG.getVScale(dl, IntPtr,
	APInt(IntPtr.getScalarSizeInBits(),
	TySize.getKnownMinValue())));
	else
	AllocSize =
	DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize,
	DAG.getConstant(TySize.getFixedValue(), dl, IntPtr));

	// Handle alignment. If the requested alignment is less than or equal to
	// the stack alignment, ignore it. If the size is greater than or equal to
	// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
	Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign();
	if (*Alignment <= StackAlign)
	Alignment = None;

	const uint64_t StackAlignMask = StackAlign.value() - 1U;
	// Round the size of the allocation up to the stack alignment size
	// by add SA-1 to the size. This doesn't overflow because we're computing
	// an address inside an alloca.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);
	AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
	DAG.getConstant(StackAlignMask, dl, IntPtr), Flags);

	// Mask out the low bits for alignment purposes.
	AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
	DAG.getConstant(~StackAlignMask, dl, IntPtr));

	SDValue Ops[] = {
	getRoot(), AllocSize,
	DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)};
	SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
	SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
	setValue(&I, DSA);
	DAG.setRoot(DSA.getValue(1));

	assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
	}

	void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
	if (I.isAtomic())
	return visitAtomicLoad(I);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Value *SV = I.getOperand(0);
	if (TLI.supportSwiftError()) {
	// Swifterror values can come from either a function parameter with
	// swifterror attribute or an alloca with swifterror attribute.
	if (const Argument *Arg = dyn_cast<Argument>(SV)) {
	if (Arg->hasSwiftErrorAttr())
	return visitLoadFromSwiftError(I);
	}

	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
	if (Alloca->isSwiftError())
	return visitLoadFromSwiftError(I);
	}
	}

	SDValue Ptr = getValue(SV);

	Type *Ty = I.getType();
	Align Alignment = I.getAlign();

	AAMDNodes AAInfo = I.getAAMetadata();
	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	return;

	bool isVolatile = I.isVolatile();
	MachineMemOperand::Flags MMOFlags =
	TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());

	SDValue Root;
	bool ConstantMemory = false;
	if (isVolatile)
	// Serialize volatile loads with other side effects.
	Root = getRoot();
	else if (NumValues > MaxParallelChains)
	Root = getMemoryRoot();
	else if (AA &&
	AA->pointsToConstantMemory(MemoryLocation(
	SV,
	LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
	AAInfo))) {
	// Do not serialize (non-volatile) loads of constant memory with anything.
	Root = DAG.getEntryNode();
	ConstantMemory = true;
	MMOFlags \|= MachineMemOperand::MOInvariant;

	// FIXME: pointsToConstantMemory probably does not imply dereferenceable,
	// but the previous usage implied it did. Probably should check
	// isDereferenceableAndAlignedPointer.
	MMOFlags \|= MachineMemOperand::MODereferenceable;
	} else {
	// Do not serialize non-volatile loads against each other.
	Root = DAG.getRoot();
	}

	SDLoc dl = getCurSDLoc();

	if (isVolatile)
	Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);

	// An aggregate load cannot wrap around the address space, so offsets to its
	// parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	SmallVector<SDValue, 4> Values(NumValues);
	SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
	EVT PtrVT = Ptr.getValueType();

	unsigned ChainI = 0;
	for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
	// Serializing loads here may result in excessive register pressure, and
	// TokenFactor places arbitrary choke points on the scheduler. SD scheduling
	// could recover a bit by hoisting nodes upward in the chain by recognizing
	// they are side-effect free or do not alias. The optimizer should really
	// avoid this case by converting large object/array copies to llvm.memcpy
	// (MaxParallelChains should always remain as failsafe).
	if (ChainI == MaxParallelChains) {
	assert(PendingLoads.empty() && "PendingLoads must be serialized first");
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	Root = Chain;
	ChainI = 0;
	}
	SDValue A = DAG.getNode(ISD::ADD, dl,
	PtrVT, Ptr,
	DAG.getConstant(Offsets[i], dl, PtrVT),
	Flags);

	SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
	MachinePointerInfo(SV, Offsets[i]), Alignment,
	MMOFlags, AAInfo, Ranges);
	Chains[ChainI] = L.getValue(1);

	if (MemVTs[i] != ValueVTs[i])
	L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);

	Values[i] = L;
	}

	if (!ConstantMemory) {
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	if (isVolatile)
	DAG.setRoot(Chain);
	else
	PendingLoads.push_back(Chain);
	}

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
	assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
	"call visitStoreToSwiftError when backend supports swifterror");

	SmallVector<EVT, 4> ValueVTs;
	SmallVector<uint64_t, 4> Offsets;
	const Value *SrcV = I.getOperand(0);
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
	SrcV->getType(), ValueVTs, &Offsets);
	assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
	"expect a single EVT for swifterror");

	SDValue Src = getValue(SrcV);
	// Create a virtual register, then update the virtual register.
	Register VReg =
	SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
	// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
	// Chain can be getRoot or getControlRoot.
	SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
	SDValue(Src.getNode(), Src.getResNo()));
	DAG.setRoot(CopyNode);
	}

	void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
	assert(DAG.getTargetLoweringInfo().supportSwiftError() &&
	"call visitLoadFromSwiftError when backend supports swifterror");

	assert(!I.isVolatile() &&
	!I.hasMetadata(LLVMContext::MD_nontemporal) &&
	!I.hasMetadata(LLVMContext::MD_invariant_load) &&
	"Support volatile, non temporal, invariant for load_from_swift_error");

	const Value *SV = I.getOperand(0);
	Type *Ty = I.getType();
	assert(
	(!AA \|\|
	!AA->pointsToConstantMemory(MemoryLocation(
	SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
	I.getAAMetadata()))) &&
	"load_from_swift_error should not be constant memory");

	SmallVector<EVT, 4> ValueVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
	ValueVTs, &Offsets);
	assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
	"expect a single EVT for swifterror");

	// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
	SDValue L = DAG.getCopyFromReg(
	getRoot(), getCurSDLoc(),
	SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);

	setValue(&I, L);
	}

	void SelectionDAGBuilder::visitStore(const StoreInst &I) {
	if (I.isAtomic())
	return visitAtomicStore(I);

	const Value *SrcV = I.getOperand(0);
	const Value *PtrV = I.getOperand(1);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.supportSwiftError()) {
	// Swifterror values can come from either a function parameter with
	// swifterror attribute or an alloca with swifterror attribute.
	if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
	if (Arg->hasSwiftErrorAttr())
	return visitStoreToSwiftError(I);
	}

	if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
	if (Alloca->isSwiftError())
	return visitStoreToSwiftError(I);
	}
	}

	SmallVector<EVT, 4> ValueVTs, MemVTs;
	SmallVector<uint64_t, 4> Offsets;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
	SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	return;

	// Get the lowered operands. Note that we do this after
	// checking if NumResults is zero, because with zero results
	// the operands won't have values in the map.
	SDValue Src = getValue(SrcV);
	SDValue Ptr = getValue(PtrV);

	SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot();
	SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
	SDLoc dl = getCurSDLoc();
	Align Alignment = I.getAlign();
	AAMDNodes AAInfo = I.getAAMetadata();

	auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());

	// An aggregate load cannot wrap around the address space, so offsets to its
	// parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	unsigned ChainI = 0;
	for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
	// See visitLoad comments.
	if (ChainI == MaxParallelChains) {
	SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	Root = Chain;
	ChainI = 0;
	}
	SDValue Add =
	DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags);
	SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
	if (MemVTs[i] != ValueVTs[i])
	Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
	SDValue St =
	DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
	Alignment, MMOFlags, AAInfo);
	Chains[ChainI] = St;
	}

	SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
	makeArrayRef(Chains.data(), ChainI));
	DAG.setRoot(StoreNode);
	}

	void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
	bool IsCompressing) {
	SDLoc sdl = getCurSDLoc();

	auto getMaskedStoreOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// llvm.masked.store.*(Src0, Ptr, alignment, Mask)
	Src0 = I.getArgOperand(0);
	Ptr = I.getArgOperand(1);
	Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue();
	Mask = I.getArgOperand(3);
	};
	auto getCompressingStoreOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// llvm.masked.compressstore.*(Src0, Ptr, Mask)
	Src0 = I.getArgOperand(0);
	Ptr = I.getArgOperand(1);
	Mask = I.getArgOperand(2);
	Alignment = None;
	};

	Value PtrOperand, MaskOperand, *Src0Operand;
	MaybeAlign Alignment;
	if (IsCompressing)
	getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
	else
	getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);

	SDValue Ptr = getValue(PtrOperand);
	SDValue Src0 = getValue(Src0Operand);
	SDValue Mask = getValue(MaskOperand);
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

	EVT VT = Src0.getValueType();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
	MemoryLocation::UnknownSize, *Alignment, I.getAAMetadata());
	SDValue StoreNode =
	DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
	ISD::UNINDEXED, false /* Truncating */, IsCompressing);
	DAG.setRoot(StoreNode);
	setValue(&I, StoreNode);
	}

	// Get a uniform base for the Gather/Scatter intrinsic.
	// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
	// We try to represent it as a base pointer + vector of indices.
	// Usually, the vector of pointers comes from a 'getelementptr' instruction.
	// The first operand of the GEP may be a single pointer or a vector of pointers
	// Example:
	// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
	// or
	// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
	// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
	//
	// When the first GEP operand is a single pointer - it is the uniform base we
	// are looking for. If first operand of the GEP is a splat vector - we
	// extract the splat value and use it as a uniform base.
	// In all other cases the function returns 'false'.
	static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
	ISD::MemIndexType &IndexType, SDValue &Scale,
	SelectionDAGBuilder SDB, const BasicBlock CurBB,
	uint64_t ElemSize) {
	SelectionDAG& DAG = SDB->DAG;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();

	assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");

	// Handle splat constant pointer.
	if (auto *C = dyn_cast<Constant>(Ptr)) {
	C = C->getSplatValue();
	if (!C)
	return false;

	Base = SDB->getValue(C);

	ElementCount NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
	EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
	Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
	IndexType = ISD::SIGNED_SCALED;
	Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
	return true;
	}

	const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
	if (!GEP \|\| GEP->getParent() != CurBB)
	return false;

	if (GEP->getNumOperands() != 2)
	return false;

	const Value *BasePtr = GEP->getPointerOperand();
	const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);

	// Make sure the base is scalar and the index is a vector.
	if (BasePtr->getType()->isVectorTy() \|\| !IndexVal->getType()->isVectorTy())
	return false;

	Base = SDB->getValue(BasePtr);
	Index = SDB->getValue(IndexVal);
	IndexType = ISD::SIGNED_SCALED;

	// MGATHER/MSCATTER are only required to support scaling by one or by the
	// element size. Other scales may be produced using target-specific DAG
	// combines.
	uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
	if (ScaleVal != ElemSize && ScaleVal != 1)
	return false;

	Scale =
	DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
	return true;
	}

	void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
	SDLoc sdl = getCurSDLoc();

	// llvm.masked.scatter.*(Src0, Ptrs, alignment, Mask)
	const Value *Ptr = I.getArgOperand(1);
	SDValue Src0 = getValue(I.getArgOperand(0));
	SDValue Mask = getValue(I.getArgOperand(3));
	EVT VT = Src0.getValueType();
	Align Alignment = cast<ConstantInt>(I.getArgOperand(2))
	->getMaybeAlignValue()
	.value_or(DAG.getEVTAlign(VT.getScalarType()));
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	SDValue Base;
	SDValue Index;
	ISD::MemIndexType IndexType;
	SDValue Scale;
	bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
	I.getParent(), VT.getScalarStoreSize());

	unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOStore,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
	if (!UniformBase) {
	Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(Ptr);
	IndexType = ISD::SIGNED_SCALED;
	Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	}

	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
	}

	SDValue Ops[] = { getMemoryRoot(), Src0, Mask, Base, Index, Scale };
	SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
	Ops, MMO, IndexType, false);
	DAG.setRoot(Scatter);
	setValue(&I, Scatter);
	}

	void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
	SDLoc sdl = getCurSDLoc();

	auto getMaskedLoadOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
	Ptr = I.getArgOperand(0);
	Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
	Mask = I.getArgOperand(2);
	Src0 = I.getArgOperand(3);
	};
	auto getExpandingLoadOps = [&](Value &Ptr, Value &Mask, Value *&Src0,
	MaybeAlign &Alignment) {
	// @llvm.masked.expandload.*(Ptr, Mask, Src0)
	Ptr = I.getArgOperand(0);
	Alignment = None;
	Mask = I.getArgOperand(1);
	Src0 = I.getArgOperand(2);
	};

	Value PtrOperand, MaskOperand, *Src0Operand;
	MaybeAlign Alignment;
	if (IsExpanding)
	getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
	else
	getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);

	SDValue Ptr = getValue(PtrOperand);
	SDValue Src0 = getValue(Src0Operand);
	SDValue Mask = getValue(MaskOperand);
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());

	EVT VT = Src0.getValueType();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);

	AAMDNodes AAInfo = I.getAAMetadata();
	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	// Do not serialize masked loads of constant memory with anything.
	MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
	bool AddToChain = !AA \|\| !AA->pointsToConstantMemory(ML);

	SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
	MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);

	SDValue Load =
	DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO,
	ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding);
	if (AddToChain)
	PendingLoads.push_back(Load.getValue(1));
	setValue(&I, Load);
	}

	void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
	SDLoc sdl = getCurSDLoc();

	// @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
	const Value *Ptr = I.getArgOperand(0);
	SDValue Src0 = getValue(I.getArgOperand(3));
	SDValue Mask = getValue(I.getArgOperand(2));

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Align Alignment = cast<ConstantInt>(I.getArgOperand(1))
	->getMaybeAlignValue()
	.value_or(DAG.getEVTAlign(VT.getScalarType()));

	const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);

	SDValue Root = DAG.getRoot();
	SDValue Base;
	SDValue Index;
	ISD::MemIndexType IndexType;
	SDValue Scale;
	bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
	I.getParent(), VT.getScalarStoreSize());
	unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOLoad,
	// TODO: Make MachineMemOperands aware of scalable
	// vectors.
	MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);

	if (!UniformBase) {
	Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(Ptr);
	IndexType = ISD::SIGNED_SCALED;
	Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
	}

	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, sdl, NewIdxVT, Index);
	}

	SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
	SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
	Ops, MMO, IndexType, ISD::NON_EXTLOAD);

	PendingLoads.push_back(Gather.getValue(1));
	setValue(&I, Gather);
	}

	void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
	SDLoc dl = getCurSDLoc();
	AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
	AtomicOrdering FailureOrdering = I.getFailureOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
	SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering,
	FailureOrdering);

	SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
	dl, MemVT, VTs, InChain,
	getValue(I.getPointerOperand()),
	getValue(I.getCompareOperand()),
	getValue(I.getNewValOperand()), MMO);

	SDValue OutChain = L.getValue(2);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
	SDLoc dl = getCurSDLoc();
	ISD::NodeType NT;
	switch (I.getOperation()) {
	default: llvm_unreachable("Unknown atomicrmw operation");
	case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
	case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
	case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
	case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
	case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
	case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
	case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
	case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
	case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
	case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
	case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
	case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
	case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
	case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break;
	case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break;
	}
	AtomicOrdering Ordering = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering);

	SDValue L =
	DAG.getAtomic(NT, dl, MemVT, InChain,
	getValue(I.getPointerOperand()), getValue(I.getValOperand()),
	MMO);

	SDValue OutChain = L.getValue(1);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitFence(const FenceInst &I) {
	SDLoc dl = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Ops[3];
	Ops[0] = getRoot();
	Ops[1] = DAG.getTargetConstant((unsigned)I.getOrdering(), dl,
	TLI.getFenceOperandTy(DAG.getDataLayout()));
	Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl,
	TLI.getFenceOperandTy(DAG.getDataLayout()));
	DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
	}

	void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
	SDLoc dl = getCurSDLoc();
	AtomicOrdering Order = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());

	if (!TLI.supportsUnalignedAtomics() &&
	I.getAlign().value() < MemVT.getSizeInBits() / 8)
	report_fatal_error("Cannot generate unaligned atomic load");

	auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout());

	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	I.getAlign(), AAMDNodes(), nullptr, SSID, Order);

	InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);

	SDValue Ptr = getValue(I.getPointerOperand());

	if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
	// TODO: Once this is better exercised by tests, it should be merged with
	// the normal path for loads to prevent future divergence.
	SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
	if (MemVT != VT)
	L = DAG.getPtrExtOrTrunc(L, dl, VT);

	setValue(&I, L);
	SDValue OutChain = L.getValue(1);
	if (!I.isUnordered())
	DAG.setRoot(OutChain);
	else
	PendingLoads.push_back(OutChain);
	return;
	}

	SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
	Ptr, MMO);

	SDValue OutChain = L.getValue(1);
	if (MemVT != VT)
	L = DAG.getPtrExtOrTrunc(L, dl, VT);

	setValue(&I, L);
	DAG.setRoot(OutChain);
	}

	void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
	SDLoc dl = getCurSDLoc();

	AtomicOrdering Ordering = I.getOrdering();
	SyncScope::ID SSID = I.getSyncScopeID();

	SDValue InChain = getRoot();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT MemVT =
	TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());

	if (I.getAlign().value() < MemVT.getSizeInBits() / 8)
	report_fatal_error("Cannot generate unaligned atomic store");

	auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());

	MachineFunction &MF = DAG.getMachineFunction();
	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(),
	I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering);

	SDValue Val = getValue(I.getValueOperand());
	if (Val.getValueType() != MemVT)
	Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
	SDValue Ptr = getValue(I.getPointerOperand());

	if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
	// TODO: Once this is better exercised by tests, it should be merged with
	// the normal path for stores to prevent future divergence.
	SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
	DAG.setRoot(S);
	return;
	}
	SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
	Ptr, Val, MMO);


	DAG.setRoot(OutChain);
	}

	/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
	/// node.
	void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
	unsigned Intrinsic) {
	// Ignore the callsite's attributes. A specific call site may be marked with
	// readnone, but the lowering code will expect the chain based on the
	// definition.
	const Function *F = I.getCalledFunction();
	bool HasChain = !F->doesNotAccessMemory();
	bool OnlyLoad = HasChain && F->onlyReadsMemory();

	// Build the operand list.
	SmallVector<SDValue, 8> Ops;
	if (HasChain) { // If this intrinsic has side-effects, chainify it.
	if (OnlyLoad) {
	// We don't need to serialize loads against other loads.
	Ops.push_back(DAG.getRoot());
	} else {
	Ops.push_back(getRoot());
	}
	}

	// Info is set by getTgtMemIntrinsic
	TargetLowering::IntrinsicInfo Info;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
	DAG.getMachineFunction(),
	Intrinsic);

	// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
	if (!IsTgtIntrinsic \|\| Info.opc == ISD::INTRINSIC_VOID \|\|
	Info.opc == ISD::INTRINSIC_W_CHAIN)
	Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
	TLI.getPointerTy(DAG.getDataLayout())));

	// Add all operands of the call to the operand list.
	for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
	const Value *Arg = I.getArgOperand(i);
	if (!I.paramHasAttr(i, Attribute::ImmArg)) {
	Ops.push_back(getValue(Arg));
	continue;
	}

	// Use TargetConstant instead of a regular constant for immarg.
	EVT VT = TLI.getValueType(DAG.getDataLayout(), Arg->getType(), true);
	if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
	assert(CI->getBitWidth() <= 64 &&
	"large intrinsic immediates not handled");
	Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
	} else {
	Ops.push_back(
	DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
	}
	}

	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);

	if (HasChain)
	ValueVTs.push_back(MVT::Other);

	SDVTList VTs = DAG.getVTList(ValueVTs);

	// Propagate fast-math-flags from IR to node(s).
	SDNodeFlags Flags;
	if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPMO);
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);

	// Create the node.
	SDValue Result;
	if (IsTgtIntrinsic) {
	// This is target intrinsic that touches memory
	Result =
	DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
	MachinePointerInfo(Info.ptrVal, Info.offset),
	Info.align, Info.flags, Info.size,
	I.getAAMetadata());
	} else if (!HasChain) {
	Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
	} else if (!I.getType()->isVoidTy()) {
	Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
	} else {
	Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
	}

	if (HasChain) {
	SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
	if (OnlyLoad)
	PendingLoads.push_back(Chain);
	else
	DAG.setRoot(Chain);
	}

	if (!I.getType()->isVoidTy()) {
	if (!isa<VectorType>(I.getType()))
	Result = lowerRangeToAssertZExt(DAG, I, Result);

	MaybeAlign Alignment = I.getRetAlign();
	if (!Alignment)
	Alignment = F->getAttributes().getRetAlignment();
	// Insert `assertalign` node if there's an alignment.
	if (InsertAssertAlign && Alignment) {
	Result =
	DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
	}

	setValue(&I, Result);
	}
	}

	/// GetSignificand - Get the significand and build it into a floating-point
	/// number with exponent of 1:
	///
	/// Op = (Op & 0x007fffff) \| 0x3f800000;
	///
	/// where Op is the hexadecimal representation of floating point value.
	static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, const SDLoc &dl) {
	SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
	DAG.getConstant(0x007fffff, dl, MVT::i32));
	SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
	DAG.getConstant(0x3f800000, dl, MVT::i32));
	return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
	}

	/// GetExponent - Get the exponent:
	///
	/// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
	///
	/// where Op is the hexadecimal representation of floating point value.
	static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
	const TargetLowering &TLI, const SDLoc &dl) {
	SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
	DAG.getConstant(0x7f800000, dl, MVT::i32));
	SDValue t1 = DAG.getNode(
	ISD::SRL, dl, MVT::i32, t0,
	DAG.getConstant(23, dl,
	TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout())));
	SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
	DAG.getConstant(127, dl, MVT::i32));
	return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
	}

	/// getF32Constant - Get 32-bit floating point constant.
	static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
	const SDLoc &dl) {
	return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
	MVT::f32);
	}

	static SDValue getLimitedPrecisionExp2(SDValue t0, const SDLoc &dl,
	SelectionDAG &DAG) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	// IntegerPartOfX = ((int32_t)(t0);
	SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);

	// FractionalPartOfX = t0 - (float)IntegerPartOfX;
	SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
	SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);

	// IntegerPartOfX <<= 23;
	IntegerPartOfX =
	DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
	DAG.getConstant(23, dl,
	DAG.getTargetLoweringInfo().getShiftAmountTy(
	MVT::i32, DAG.getDataLayout())));

	SDValue TwoToFractionalPartOfX;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// TwoToFractionalPartOfX =
	// 0.997535578f +
	// (0.735607626f + 0.252464424f * x) * x;
	//
	// error 0.0144103317, which is 6 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3e814304, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f3c50c8, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f7f5e7e, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// TwoToFractionalPartOfX =
	// 0.999892986f +
	// (0.696457318f +
	// (0.224338339f + 0.792043434e-1f * x) * x) * x;
	//
	// error 0.000107046256, which is 13 to 14 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3da235e3, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3e65b8f3, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f324b07, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3f7ff8fd, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// TwoToFractionalPartOfX =
	// 0.999999982f +
	// (0.693148872f +
	// (0.240227044f +
	// (0.554906021e-1f +
	// (0.961591928e-2f +
	// (0.136028312e-2f + 0.157059148e-3f x)x)x)x)x)x;
	// error 2.47208000*10^(-7), which is better than 18 bits
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3924b03e, dl));
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3ab24b87, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3c1d8c17, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3d634a1d, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x3e75fe14, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x3f317234, dl));
	SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
	TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
	getF32Constant(DAG, 0x3f800000, dl));
	}

	// Add the exponent into the result in integer domain.
	SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
	return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
	DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
	}

	/// expandExp - Lower an exp intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {

	// Put the exponent in the right bit position for later addition to the
	// final result:
	//
	// t0 = Op * log2(e)

	// TODO: What fast-math-flags should be set here?
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
	DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
	return getLimitedPrecisionExp2(t0, dl, DAG);
	}

	// No special expansion.
	return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog - Lower a log intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Scale the exponent by log(2).
	SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
	SDValue LogOfExponent =
	DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
	DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	SDValue LogOfMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// LogofMantissa =
	// -1.1609546f +
	// (1.4034025f - 0.23903021f * x) * x;
	//
	// error 0.0034276066, which is better than 8 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbe74c456, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3fb3a2b1, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f949a29, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// LogOfMantissa =
	// -1.7417939f +
	// (2.8212026f +
	// (-1.4699568f +
	// (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
	//
	// error 0.000061011436, which is 14 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbd67b6d6, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3ee4f4b8, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fbc278b, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40348e95, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3fdef31a, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// LogOfMantissa =
	// -2.1072184f +
	// (4.2372794f +
	// (-3.7029485f +
	// (2.2781945f +
	// (-0.87823314f +
	// (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
	//
	// error 0.0000023660568, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbc91e5ac, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e4350aa, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f60d3e3, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x4011cdf0, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x406cfd1c, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x408797cb, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x4006dcab, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Get the exponent.
	SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	// Different possible minimax approximations of significand in
	// floating-point for various degrees of accuracy over [1,2].
	SDValue Log2ofMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
	//
	// error 0.0049451742, which is more than 7 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbeb08fe0, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x40019463, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fd6633d, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// Log2ofMantissa =
	// -2.51285454f +
	// (4.07009056f +
	// (-2.12067489f +
	// (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
	//
	// error 0.0000876136000, which is better than 13 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbda7262e, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3f25280b, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x4007b923, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40823e2f, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x4020d29c, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// Log2ofMantissa =
	// -3.0400495f +
	// (6.1129976f +
	// (-5.3420409f +
	// (3.2865683f +
	// (-1.2669343f +
	// (0.27515199f -
	// 0.25691327e-1f * x) * x) * x) * x) * x) * x;
	//
	// error 0.0000018516, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbcd2769e, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e8ce0b9, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3fa22ae7, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x40525723, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x40aaf200, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x40c39dad, dl));
	SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
	Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
	getF32Constant(DAG, 0x4042902c, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op, Flags);
	}

	/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	// TODO: What fast-math-flags should be set on the floating-point nodes?

	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);

	// Scale the exponent by log10(2) [0.30102999f].
	SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
	SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
	getF32Constant(DAG, 0x3e9a209a, dl));

	// Get the significand and build it into a floating-point number with
	// exponent of 1.
	SDValue X = GetSignificand(DAG, Op1, dl);

	SDValue Log10ofMantissa;
	if (LimitFloatPrecision <= 6) {
	// For floating-point precision of 6:
	//
	// Log10ofMantissa =
	// -0.50419619f +
	// (0.60948995f - 0.10380950f * x) * x;
	//
	// error 0.0014886165, which is 6 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0xbdd49a13, dl));
	SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3f1c0789, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f011300, dl));
	} else if (LimitFloatPrecision <= 12) {
	// For floating-point precision of 12:
	//
	// Log10ofMantissa =
	// -0.64831180f +
	// (0.91751397f +
	// (-0.31664806f + 0.47637168e-1f * x) * x) * x;
	//
	// error 0.00019228036, which is better than 12 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3d431f31, dl));
	SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3ea21fb2, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3f6ae232, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f25f7c3, dl));
	} else { // LimitFloatPrecision <= 18
	// For floating-point precision of 18:
	//
	// Log10ofMantissa =
	// -0.84299375f +
	// (1.5327582f +
	// (-1.0688956f +
	// (0.49102474f +
	// (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
	//
	// error 0.0000037995730, which is better than 18 bits
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
	getF32Constant(DAG, 0x3c5d51ce, dl));
	SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
	getF32Constant(DAG, 0x3e00685a, dl));
	SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
	SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
	getF32Constant(DAG, 0x3efb6798, dl));
	SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
	SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
	getF32Constant(DAG, 0x3f88d192, dl));
	SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
	SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
	getF32Constant(DAG, 0x3fc4316c, dl));
	SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
	Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
	getF32Constant(DAG, 0x3f57ce70, dl));
	}

	return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
	}

	// No special expansion.
	return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op, Flags);
	}

	/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
	/// limited-precision mode.
	static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
	const TargetLowering &TLI, SDNodeFlags Flags) {
	if (Op.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
	return getLimitedPrecisionExp2(Op, dl, DAG);

	// No special expansion.
	return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op, Flags);
	}

	/// visitPow - Lower a pow intrinsic. Handles the special sequences for
	/// limited-precision mode with x == 10.0f.
	static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
	SelectionDAG &DAG, const TargetLowering &TLI,
	SDNodeFlags Flags) {
	bool IsExp10 = false;
	if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
	LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
	if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
	APFloat Ten(10.0f);
	IsExp10 = LHSC->isExactlyValue(Ten);
	}
	}

	// TODO: What fast-math-flags should be set on the FMUL node?
	if (IsExp10) {
	// Put the exponent in the right bit position for later addition to the
	// final result:
	//
	// #define LOG2OF10 3.3219281f
	// t0 = Op * LOG2OF10;
	SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
	getF32Constant(DAG, 0x40549a78, dl));
	return getLimitedPrecisionExp2(t0, dl, DAG);
	}

	// No special expansion.
	return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS, Flags);
	}

	/// ExpandPowI - Expand a llvm.powi intrinsic.
	static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
	SelectionDAG &DAG) {
	// If RHS is a constant, we can expand this out to a multiplication tree if
	// it's beneficial on the target, otherwise we end up lowering to a call to
	// __powidf2 (for example).
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
	unsigned Val = RHSC->getSExtValue();

	// powi(x, 0) -> 1.0
	if (Val == 0)
	return DAG.getConstantFP(1.0, DL, LHS.getValueType());

	if (DAG.getTargetLoweringInfo().isBeneficialToExpandPowI(
	Val, DAG.shouldOptForSize())) {
	// Get the exponent as a positive value.
	if ((int)Val < 0)
	Val = -Val;
	// We use the simple binary decomposition method to generate the multiply
	// sequence. There are more optimal ways to do this (for example,
	// powi(x,15) generates one more multiply than it should), but this has
	// the benefit of being both really simple and much better than a libcall.
	SDValue Res; // Logically starts equal to 1.0
	SDValue CurSquare = LHS;
	// TODO: Intrinsics should have fast-math-flags that propagate to these
	// nodes.
	while (Val) {
	if (Val & 1) {
	if (Res.getNode())
	Res =
	DAG.getNode(ISD::FMUL, DL, Res.getValueType(), Res, CurSquare);
	else
	Res = CurSquare; // 1.0*CurSquare.
	}

	CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
	CurSquare, CurSquare);
	Val >>= 1;
	}

	// If the original was negative, invert the result, producing 1/(xxx).
	if (RHSC->getSExtValue() < 0)
	Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
	DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
	return Res;
	}
	}

	// Otherwise, expand to a libcall.
	return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
	}

	static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
	SDValue LHS, SDValue RHS, SDValue Scale,
	SelectionDAG &DAG, const TargetLowering &TLI) {
	EVT VT = LHS.getValueType();
	bool Signed = Opcode == ISD::SDIVFIX \|\| Opcode == ISD::SDIVFIXSAT;
	bool Saturating = Opcode == ISD::SDIVFIXSAT \|\| Opcode == ISD::UDIVFIXSAT;
	LLVMContext &Ctx = *DAG.getContext();

	// If the type is legal but the operation isn't, this node might survive all
	// the way to operation legalization. If we end up there and we do not have
	// the ability to widen the type (if VT*2 is not legal), we cannot expand the
	// node.

	// Coax the legalizer into expanding the node during type legalization instead
	// by bumping the size by one bit. This will force it to Promote, enabling the
	// early expansion and avoiding the need to expand later.

	// We don't have to do this if Scale is 0; that can always be expanded, unless
	// it's a saturating signed operation. Those can experience true integer
	// division overflow, a case which we must avoid.

	// FIXME: We wouldn't have to do this (or any of the early
	// expansion/promotion) if it was possible to expand a libcall of an
	// illegal type during operation legalization. But it's not, so things
	// get a bit hacky.
	unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
	if ((ScaleInt > 0 \|\| (Saturating && Signed)) &&
	(TLI.isTypeLegal(VT) \|\|
	(VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
	TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
	Opcode, VT, ScaleInt);
	if (Action != TargetLowering::Legal && Action != TargetLowering::Custom) {
	EVT PromVT;
	if (VT.isScalarInteger())
	PromVT = EVT::getIntegerVT(Ctx, VT.getSizeInBits() + 1);
	else if (VT.isVector()) {
	PromVT = VT.getVectorElementType();
	PromVT = EVT::getIntegerVT(Ctx, PromVT.getSizeInBits() + 1);
	PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
	} else
	llvm_unreachable("Wrong VT for DIVFIX?");
	if (Signed) {
	LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
	RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
	} else {
	LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
	RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
	}
	EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
	// For saturating operations, we need to shift up the LHS to get the
	// proper saturation width, and then shift down again afterwards.
	if (Saturating)
	LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
	DAG.getConstant(1, DL, ShiftTy));
	SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
	if (Saturating)
	Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
	DAG.getConstant(1, DL, ShiftTy));
	return DAG.getZExtOrTrunc(Res, DL, VT);
	}
	}

	return DAG.getNode(Opcode, DL, VT, LHS, RHS, Scale);
	}

	// getUnderlyingArgRegs - Find underlying registers used for a truncated,
	// bitcasted, or split argument. Returns a list of <Register, size in bits>
	static void
	getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, TypeSize>> &Regs,
	const SDValue &N) {
	switch (N.getOpcode()) {
	case ISD::CopyFromReg: {
	SDValue Op = N.getOperand(1);
	Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
	Op.getValueType().getSizeInBits());
	return;
	}
	case ISD::BITCAST:
	case ISD::AssertZext:
	case ISD::AssertSext:
	case ISD::TRUNCATE:
	getUnderlyingArgRegs(Regs, N.getOperand(0));
	return;
	case ISD::BUILD_PAIR:
	case ISD::BUILD_VECTOR:
	case ISD::CONCAT_VECTORS:
	for (SDValue Op : N->op_values())
	getUnderlyingArgRegs(Regs, Op);
	return;
	default:
	return;
	}
	}

	/// If the DbgValueInst is a dbg_value of a function argument, create the
	/// corresponding DBG_VALUE machine instruction for it now. At the end of
	/// instruction selection, they will be inserted to the entry BB.
	/// We don't currently support this for variadic dbg_values, as they shouldn't
	/// appear for function arguments or in the prologue.
	bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
	const Value V, DILocalVariable Variable, DIExpression *Expr,
	DILocation *DL, FuncArgumentDbgValueKind Kind, const SDValue &N) {
	const Argument *Arg = dyn_cast<Argument>(V);
	if (!Arg)
	return false;

	MachineFunction &MF = DAG.getMachineFunction();
	const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();

	// Helper to create DBG_INSTR_REFs or DBG_VALUEs, depending on what kind
	// we've been asked to pursue.
	auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
	bool Indirect) {
	if (Reg.isVirtual() && MF.useDebugInstrRef()) {
	// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
	// pointing at the VReg, which will be patched up later.
	auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
	auto MIB = BuildMI(MF, DL, Inst);
	MIB.addReg(Reg);
	MIB.addImm(0);
	MIB.addMetadata(Variable);
	auto *NewDIExpr = FragExpr;
	// We don't have an "Indirect" field in DBG_INSTR_REF, fold that into
	// the DIExpression.
	if (Indirect)
	NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore);
	MIB.addMetadata(NewDIExpr);
	return MIB;
	} else {
	// Create a completely standard DBG_VALUE.
	auto &Inst = TII->get(TargetOpcode::DBG_VALUE);
	return BuildMI(MF, DL, Inst, Indirect, Reg, Variable, FragExpr);
	}
	};

	if (Kind == FuncArgumentDbgValueKind::Value) {
	// ArgDbgValues are hoisted to the beginning of the entry block. So we
	// should only emit as ArgDbgValue if the dbg.value intrinsic is found in
	// the entry block.
	bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
	if (!IsInEntryBlock)
	return false;

	// ArgDbgValues are hoisted to the beginning of the entry block. So we
	// should only emit as ArgDbgValue if the dbg.value intrinsic describes a
	// variable that also is a param.
	//
	// Although, if we are at the top of the entry block already, we can still
	// emit using ArgDbgValue. This might catch some situations when the
	// dbg.value refers to an argument that isn't used in the entry block, so
	// any CopyToReg node would be optimized out and the only way to express
	// this DBG_VALUE is by using the physical reg (or FI) as done in this
	// method. ArgDbgValues are hoisted to the beginning of the entry block. So
	// we should only emit as ArgDbgValue if the Variable is an argument to the
	// current function, and the dbg.value intrinsic is found in the entry
	// block.
	bool VariableIsFunctionInputArg = Variable->isParameter() &&
	!DL->getInlinedAt();
	bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
	if (!IsInPrologue && !VariableIsFunctionInputArg)
	return false;

	// Here we assume that a function argument on IR level only can be used to
	// describe one input parameter on source level. If we for example have
	// source code like this
	//
	// struct A { long x, y; };
	// void foo(struct A a, long b) {
	// ...
	// b = a.x;
	// ...
	// }
	//
	// and IR like this
	//
	// define void @foo(i32 %a1, i32 %a2, i32 %b) {
	// entry:
	// call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
	// call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
	// call void @llvm.dbg.value(metadata i32 %b, "b",
	// ...
	// call void @llvm.dbg.value(metadata i32 %a1, "b"
	// ...
	//
	// then the last dbg.value is describing a parameter "b" using a value that
	// is an argument. But since we already has used %a1 to describe a parameter
	// we should not handle that last dbg.value here (that would result in an
	// incorrect hoisting of the DBG_VALUE to the function entry).
	// Notice that we allow one dbg.value per IR level argument, to accommodate
	// for the situation with fragments above.
	if (VariableIsFunctionInputArg) {
	unsigned ArgNo = Arg->getArgNo();
	if (ArgNo >= FuncInfo.DescribedArgs.size())
	FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
	else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
	return false;
	FuncInfo.DescribedArgs.set(ArgNo);
	}
	}

	bool IsIndirect = false;
	Optional<MachineOperand> Op;
	// Some arguments' frame index is recorded during argument lowering.
	int FI = FuncInfo.getArgumentFrameIndex(Arg);
	if (FI != std::numeric_limits<int>::max())
	Op = MachineOperand::CreateFI(FI);

	SmallVector<std::pair<unsigned, TypeSize>, 8> ArgRegsAndSizes;
	if (!Op && N.getNode()) {
	getUnderlyingArgRegs(ArgRegsAndSizes, N);
	Register Reg;
	if (ArgRegsAndSizes.size() == 1)
	Reg = ArgRegsAndSizes.front().first;

	if (Reg && Reg.isVirtual()) {
	MachineRegisterInfo &RegInfo = MF.getRegInfo();
	Register PR = RegInfo.getLiveInPhysReg(Reg);
	if (PR)
	Reg = PR;
	}
	if (Reg) {
	Op = MachineOperand::CreateReg(Reg, false);
	IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
	}
	}

	if (!Op && N.getNode()) {
	// Check if frame index is available.
	SDValue LCandidate = peekThroughBitcasts(N);
	if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
	if (FrameIndexSDNode *FINode =
	dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
	Op = MachineOperand::CreateFI(FINode->getIndex());
	}

	if (!Op) {
	// Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
	auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
	SplitRegs) {
	unsigned Offset = 0;
	for (const auto &RegAndSize : SplitRegs) {
	// If the expression is already a fragment, the current register
	// offset+size might extend beyond the fragment. In this case, only
	// the register bits that are inside the fragment are relevant.
	int RegFragmentSizeInBits = RegAndSize.second;
	if (auto ExprFragmentInfo = Expr->getFragmentInfo()) {
	uint64_t ExprFragmentSizeInBits = ExprFragmentInfo->SizeInBits;
	// The register is entirely outside the expression fragment,
	// so is irrelevant for debug info.
	if (Offset >= ExprFragmentSizeInBits)
	break;
	// The register is partially outside the expression fragment, only
	// the low bits within the fragment are relevant for debug info.
	if (Offset + RegFragmentSizeInBits > ExprFragmentSizeInBits) {
	RegFragmentSizeInBits = ExprFragmentSizeInBits - Offset;
	}
	}

	auto FragmentExpr = DIExpression::createFragmentExpression(
	Expr, Offset, RegFragmentSizeInBits);
	Offset += RegAndSize.second;
	// If a valid fragment expression cannot be created, the variable's
	// correct value cannot be determined and so it is set as Undef.
	if (!FragmentExpr) {
	SDDbgValue *SDV = DAG.getConstantDbgValue(
	Variable, Expr, UndefValue::get(V->getType()), DL, SDNodeOrder);
	DAG.AddDbgValue(SDV, false);
	continue;
	}
	MachineInstr *NewMI =
	MakeVRegDbgValue(RegAndSize.first, *FragmentExpr,
	Kind != FuncArgumentDbgValueKind::Value);
	FuncInfo.ArgDbgValues.push_back(NewMI);
	}
	};

	// Check if ValueMap has reg number.
	DenseMap<const Value *, Register>::const_iterator
	VMI = FuncInfo.ValueMap.find(V);
	if (VMI != FuncInfo.ValueMap.end()) {
	const auto &TLI = DAG.getTargetLoweringInfo();
	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
	V->getType(), None);
	if (RFV.occupiesMultipleRegs()) {
	splitMultiRegDbgValue(RFV.getRegsAndSizes());
	return true;
	}

	Op = MachineOperand::CreateReg(VMI->second, false);
	IsIndirect = Kind != FuncArgumentDbgValueKind::Value;
	} else if (ArgRegsAndSizes.size() > 1) {
	// This was split due to the calling convention, and no virtual register
	// mapping exists for the value.
	splitMultiRegDbgValue(ArgRegsAndSizes);
	return true;
	}
	}

	if (!Op)
	return false;

	assert(Variable->isValidLocationForIntrinsic(DL) &&
	"Expected inlined-at fields to agree");
	MachineInstr *NewMI = nullptr;

	if (Op->isReg())
	NewMI = MakeVRegDbgValue(Op->getReg(), Expr, IsIndirect);
	else
	NewMI = BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), true, *Op,
	Variable, Expr);

	// Otherwise, use ArgDbgValues.
	FuncInfo.ArgDbgValues.push_back(NewMI);
	return true;
	}

	/// Return the appropriate SDDbgValue based on N.
	SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
	DILocalVariable *Variable,
	DIExpression *Expr,
	const DebugLoc &dl,
	unsigned DbgSDNodeOrder) {
	if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
	// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
	// stack slot locations.
	//
	// Consider "int x = 0; int *px = &x;". There are two kinds of interesting
	// debug values here after optimization:
	//
	// dbg.value(i32* %px, !"int *px", !DIExpression()), and
	// dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref))
	//
	// Both describe the direct values of their associated variables.
	return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(),
	/IsIndirect/ false, dl, DbgSDNodeOrder);
	}
	return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(),
	/IsIndirect/ false, dl, DbgSDNodeOrder);
	}

	static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
	switch (Intrinsic) {
	case Intrinsic::smul_fix:
	return ISD::SMULFIX;
	case Intrinsic::umul_fix:
	return ISD::UMULFIX;
	case Intrinsic::smul_fix_sat:
	return ISD::SMULFIXSAT;
	case Intrinsic::umul_fix_sat:
	return ISD::UMULFIXSAT;
	case Intrinsic::sdiv_fix:
	return ISD::SDIVFIX;
	case Intrinsic::udiv_fix:
	return ISD::UDIVFIX;
	case Intrinsic::sdiv_fix_sat:
	return ISD::SDIVFIXSAT;
	case Intrinsic::udiv_fix_sat:
	return ISD::UDIVFIXSAT;
	default:
	llvm_unreachable("Unhandled fixed point intrinsic");
	}
	}

	void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
	const char *FunctionName) {
	assert(FunctionName && "FunctionName must not be nullptr");
	SDValue Callee = DAG.getExternalSymbol(
	FunctionName,
	DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
	LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
	}

	/// Given a @llvm.call.preallocated.setup, return the corresponding
	/// preallocated call.
	static const CallBase FindPreallocatedCall(const Value PreallocatedSetup) {
	assert(cast<CallBase>(PreallocatedSetup)
	->getCalledFunction()
	->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
	"expected call_preallocated_setup Value");
	for (const auto *U : PreallocatedSetup->users()) {
	auto *UseCall = cast<CallBase>(U);
	const Function *Fn = UseCall->getCalledFunction();
	if (!Fn \|\| Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
	return UseCall;
	}
	}
	llvm_unreachable("expected corresponding call to preallocated setup/arg");
	}

	/// Lower the call to the specified intrinsic function.
	void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
	unsigned Intrinsic) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc sdl = getCurSDLoc();
	DebugLoc dl = getCurDebugLoc();
	SDValue Res;

	SDNodeFlags Flags;
	if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
	Flags.copyFMF(*FPOp);

	switch (Intrinsic) {
	default:
	// By default, turn this into a target intrinsic node.
	visitTargetIntrinsic(I, Intrinsic);
	return;
	case Intrinsic::vscale: {
	match(&I, m_VScale(DAG.getDataLayout()));
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
	return;
	}
	case Intrinsic::vastart: visitVAStart(I); return;
	case Intrinsic::vaend: visitVAEnd(I); return;
	case Intrinsic::vacopy: visitVACopy(I); return;
	case Intrinsic::returnaddress:
	setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::addressofreturnaddress:
	setValue(&I,
	DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
	TLI.getValueType(DAG.getDataLayout(), I.getType())));
	return;
	case Intrinsic::sponentry:
	setValue(&I,
	DAG.getNode(ISD::SPONENTRY, sdl,
	TLI.getValueType(DAG.getDataLayout(), I.getType())));
	return;
	case Intrinsic::frameaddress:
	setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
	TLI.getFrameIndexTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::read_volatile_register:
	case Intrinsic::read_register: {
	Value *Reg = I.getArgOperand(0);
	SDValue Chain = getRoot();
	SDValue RegName =
	DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Res = DAG.getNode(ISD::READ_REGISTER, sdl,
	DAG.getVTList(VT, MVT::Other), Chain, RegName);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::write_register: {
	Value *Reg = I.getArgOperand(0);
	Value *RegValue = I.getArgOperand(1);
	SDValue Chain = getRoot();
	SDValue RegName =
	DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
	DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
	RegName, getValue(RegValue)));
	return;
	}
	case Intrinsic::memcpy: {
	const auto &MCI = cast<MemCpyInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memcpy defines 0 and 1 to both mean no alignment.
	Align DstAlign = MCI.getDestAlign().valueOrOne();
	Align SrcAlign = MCI.getSourceAlign().valueOrOne();
	Align Alignment = std::min(DstAlign, SrcAlign);
	bool isVol = MCI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memcpy DAG
	// node.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	SDValue MC = DAG.getMemcpy(
	Root, sdl, Op1, Op2, Op3, Alignment, isVol,
	/* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memcpy_inline: {
	const auto &MCI = cast<MemCpyInlineInst>(I);
	SDValue Dst = getValue(I.getArgOperand(0));
	SDValue Src = getValue(I.getArgOperand(1));
	SDValue Size = getValue(I.getArgOperand(2));
	assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size");
	// @llvm.memcpy.inline defines 0 and 1 to both mean no alignment.
	Align DstAlign = MCI.getDestAlign().valueOrOne();
	Align SrcAlign = MCI.getSourceAlign().valueOrOne();
	Align Alignment = std::min(DstAlign, SrcAlign);
	bool isVol = MCI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memcpy DAG
	// node.
	SDValue MC = DAG.getMemcpy(
	getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
	/* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA);
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memset: {
	const auto &MSI = cast<MemSetInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memset defines 0 and 1 to both mean no alignment.
	Align Alignment = MSI.getDestAlign().valueOrOne();
	bool isVol = MSI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	SDValue MS = DAG.getMemset(
	Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false,
	isTC, MachinePointerInfo(I.getArgOperand(0)), I.getAAMetadata());
	updateDAGForMaybeTailCall(MS);
	return;
	}
	case Intrinsic::memset_inline: {
	const auto &MSII = cast<MemSetInlineInst>(I);
	SDValue Dst = getValue(I.getArgOperand(0));
	SDValue Value = getValue(I.getArgOperand(1));
	SDValue Size = getValue(I.getArgOperand(2));
	assert(isa<ConstantSDNode>(Size) && "memset_inline needs constant size");
	// @llvm.memset defines 0 and 1 to both mean no alignment.
	Align DstAlign = MSII.getDestAlign().valueOrOne();
	bool isVol = MSII.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	SDValue MC = DAG.getMemset(Root, sdl, Dst, Value, Size, DstAlign, isVol,
	/* AlwaysInline */ true, isTC,
	MachinePointerInfo(I.getArgOperand(0)),
	I.getAAMetadata());
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memmove: {
	const auto &MMI = cast<MemMoveInst>(I);
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	// @llvm.memmove defines 0 and 1 to both mean no alignment.
	Align DstAlign = MMI.getDestAlign().valueOrOne();
	Align SrcAlign = MMI.getSourceAlign().valueOrOne();
	Align Alignment = std::min(DstAlign, SrcAlign);
	bool isVol = MMI.isVolatile();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	// FIXME: Support passing different dest/src alignments to the memmove DAG
	// node.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
	isTC, MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)),
	I.getAAMetadata(), AA);
	updateDAGForMaybeTailCall(MM);
	return;
	}
	case Intrinsic::memcpy_element_unordered_atomic: {
	const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Src = getValue(MI.getRawSource());
	SDValue Length = getValue(MI.getLength());

	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC =
	DAG.getAtomicMemcpy(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
	isTC, MachinePointerInfo(MI.getRawDest()),
	MachinePointerInfo(MI.getRawSource()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memmove_element_unordered_atomic: {
	auto &MI = cast<AtomicMemMoveInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Src = getValue(MI.getRawSource());
	SDValue Length = getValue(MI.getLength());

	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC =
	DAG.getAtomicMemmove(getRoot(), sdl, Dst, Src, Length, LengthTy, ElemSz,
	isTC, MachinePointerInfo(MI.getRawDest()),
	MachinePointerInfo(MI.getRawSource()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::memset_element_unordered_atomic: {
	auto &MI = cast<AtomicMemSetInst>(I);
	SDValue Dst = getValue(MI.getRawDest());
	SDValue Val = getValue(MI.getValue());
	SDValue Length = getValue(MI.getLength());

	Type *LengthTy = MI.getLength()->getType();
	unsigned ElemSz = MI.getElementSizeInBytes();
	bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
	SDValue MC =
	DAG.getAtomicMemset(getRoot(), sdl, Dst, Val, Length, LengthTy, ElemSz,
	isTC, MachinePointerInfo(MI.getRawDest()));
	updateDAGForMaybeTailCall(MC);
	return;
	}
	case Intrinsic::call_preallocated_setup: {
	const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
	SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
	SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
	getRoot(), SrcValue);
	setValue(&I, Res);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::call_preallocated_arg: {
	const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
	SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
	SDValue Ops[3];
	Ops[0] = getRoot();
	Ops[1] = SrcValue;
	Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
	MVT::i32); // arg index
	SDValue Res = DAG.getNode(
	ISD::PREALLOCATED_ARG, sdl,
	DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::dbg_addr:
	case Intrinsic::dbg_declare: {
	// Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
	// they are non-variadic.
	const auto &DI = cast<DbgVariableIntrinsic>(I);
	assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
	DILocalVariable *Variable = DI.getVariable();
	DIExpression *Expression = DI.getExpression();
	dropDanglingDebugInfo(Variable, Expression);
	assert(Variable && "Missing variable");
	LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI
	<< "\n");
	// Check if address has undef value.
	const Value *Address = DI.getVariableLocationOp(0);
	if (!Address \|\| isa<UndefValue>(Address) \|\|
	(Address->use_empty() && !isa<Argument>(Address))) {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
	<< " (bad/undef/unused-arg address)\n");
	return;
	}

	bool isParameter = Variable->isParameter() \|\| isa<Argument>(Address);

	// Check if this variable can be described by a frame index, typically
	// either as a static alloca or a byval parameter.
	int FI = std::numeric_limits<int>::max();
	if (const auto *AI =
	dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
	if (AI->isStaticAlloca()) {
	auto I = FuncInfo.StaticAllocaMap.find(AI);
	if (I != FuncInfo.StaticAllocaMap.end())
	FI = I->second;
	}
	} else if (const auto *Arg = dyn_cast<Argument>(
	Address->stripInBoundsConstantOffsets())) {
	FI = FuncInfo.getArgumentFrameIndex(Arg);
	}

	// llvm.dbg.addr is control dependent and always generates indirect
	// DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
	// the MachineFunction variable table.
	if (FI != std::numeric_limits<int>::max()) {
	if (Intrinsic == Intrinsic::dbg_addr) {
	SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
	Variable, Expression, FI, getRoot().getNode(), /IsIndirect/ true,
	dl, SDNodeOrder);
	DAG.AddDbgValue(SDV, isParameter);
	} else {
	LLVM_DEBUG(dbgs() << "Skipping " << DI
	<< " (variable info stashed in MF side table)\n");
	}
	return;
	}

	SDValue &N = NodeMap[Address];
	if (!N.getNode() && isa<Argument>(Address))
	// Check unused arguments map.
	N = UnusedArgNodeMap[Address];
	SDDbgValue *SDV;
	if (N.getNode()) {
	if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
	Address = BCI->getOperand(0);
	// Parameters are handled specially.
	auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
	if (isParameter && FINode) {
	// Byval parameter. We have a frame index at this point.
	SDV =
	DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(),
	/IsIndirect/ true, dl, SDNodeOrder);
	} else if (isa<Argument>(Address)) {
	// Address is an argument, so try to emit its dbg value using
	// virtual register info from the FuncInfo.ValueMap.
	EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
	FuncArgumentDbgValueKind::Declare, N);
	return;
	} else {
	SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
	true, dl, SDNodeOrder);
	}
	DAG.AddDbgValue(SDV, isParameter);
	} else {
	// If Address is an argument then try to emit its dbg value using
	// virtual register info from the FuncInfo.ValueMap.
	if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl,
	FuncArgumentDbgValueKind::Declare, N)) {
	LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI
	<< " (could not emit func-arg dbg_value)\n");
	}
	}
	return;
	}
	case Intrinsic::dbg_label: {
	const DbgLabelInst &DI = cast<DbgLabelInst>(I);
	DILabel *Label = DI.getLabel();
	assert(Label && "Missing label");

	SDDbgLabel *SDV;
	SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
	DAG.AddDbgLabel(SDV);
	return;
	}
	case Intrinsic::dbg_value: {
	const DbgValueInst &DI = cast<DbgValueInst>(I);
	assert(DI.getVariable() && "Missing variable");

	DILocalVariable *Variable = DI.getVariable();
	DIExpression *Expression = DI.getExpression();
	dropDanglingDebugInfo(Variable, Expression);
	SmallVector<Value *, 4> Values(DI.getValues());
	if (Values.empty())
	return;

	if (llvm::is_contained(Values, nullptr))
	return;

	bool IsVariadic = DI.hasArgList();
	if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(),
	SDNodeOrder, IsVariadic))
	addDanglingDebugInfo(&DI, dl, SDNodeOrder);
	return;
	}

	case Intrinsic::eh_typeid_for: {
	// Find the type id for the given typeinfo.
	GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
	unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
	Res = DAG.getConstant(TypeID, sdl, MVT::i32);
	setValue(&I, Res);
	return;
	}

	case Intrinsic::eh_return_i32:
	case Intrinsic::eh_return_i64:
	DAG.getMachineFunction().setCallsEHReturn(true);
	DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
	MVT::Other,
	getControlRoot(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1))));
	return;
	case Intrinsic::eh_unwind_init:
	DAG.getMachineFunction().setCallsUnwindInit(true);
	return;
	case Intrinsic::eh_dwarf_cfa:
	setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
	TLI.getPointerTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::eh_sjlj_callsite: {
	MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
	ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(0));
	assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");

	MMI.setCurrentCallSite(CI->getZExtValue());
	return;
	}
	case Intrinsic::eh_sjlj_functioncontext: {
	// Get and store the index of the function context.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	AllocaInst *FnCtx =
	cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
	int FI = FuncInfo.StaticAllocaMap[FnCtx];
	MFI.setFunctionContextIndex(FI);
	return;
	}
	case Intrinsic::eh_sjlj_setjmp: {
	SDValue Ops[2];
	Ops[0] = getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
	DAG.getVTList(MVT::i32, MVT::Other), Ops);
	setValue(&I, Op.getValue(0));
	DAG.setRoot(Op.getValue(1));
	return;
	}
	case Intrinsic::eh_sjlj_longjmp:
	DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
	getRoot(), getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::eh_sjlj_setup_dispatch:
	DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
	getRoot()));
	return;
	case Intrinsic::masked_gather:
	visitMaskedGather(I);
	return;
	case Intrinsic::masked_load:
	visitMaskedLoad(I);
	return;
	case Intrinsic::masked_scatter:
	visitMaskedScatter(I);
	return;
	case Intrinsic::masked_store:
	visitMaskedStore(I);
	return;
	case Intrinsic::masked_expandload:
	visitMaskedLoad(I, true /* IsExpanding */);
	return;
	case Intrinsic::masked_compressstore:
	visitMaskedStore(I, true /* IsCompressing */);
	return;
	case Intrinsic::powi:
	setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), DAG));
	return;
	case Intrinsic::log:
	setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::log2:
	setValue(&I,
	expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::log10:
	setValue(&I,
	expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::exp:
	setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::exp2:
	setValue(&I,
	expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI, Flags));
	return;
	case Intrinsic::pow:
	setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), DAG, TLI, Flags));
	return;
	case Intrinsic::sqrt:
	case Intrinsic::fabs:
	case Intrinsic::sin:
	case Intrinsic::cos:
	case Intrinsic::floor:
	case Intrinsic::ceil:
	case Intrinsic::trunc:
	case Intrinsic::rint:
	case Intrinsic::nearbyint:
	case Intrinsic::round:
	case Intrinsic::roundeven:
	case Intrinsic::canonicalize: {
	unsigned Opcode;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
	case Intrinsic::fabs: Opcode = ISD::FABS; break;
	case Intrinsic::sin: Opcode = ISD::FSIN; break;
	case Intrinsic::cos: Opcode = ISD::FCOS; break;
	case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
	case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
	case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
	case Intrinsic::rint: Opcode = ISD::FRINT; break;
	case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
	case Intrinsic::round: Opcode = ISD::FROUND; break;
	case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break;
	case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break;
	}

	setValue(&I, DAG.getNode(Opcode, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), Flags));
	return;
	}
	case Intrinsic::lround:
	case Intrinsic::llround:
	case Intrinsic::lrint:
	case Intrinsic::llrint: {
	unsigned Opcode;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::lround: Opcode = ISD::LROUND; break;
	case Intrinsic::llround: Opcode = ISD::LLROUND; break;
	case Intrinsic::lrint: Opcode = ISD::LRINT; break;
	case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
	}

	EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
	getValue(I.getArgOperand(0))));
	return;
	}
	case Intrinsic::minnum:
	setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::maxnum:
	setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::minimum:
	setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::maximum:
	setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::copysign:
	setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), Flags));
	return;
	case Intrinsic::arithmetic_fence: {
	setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), Flags));
	return;
	}
	case Intrinsic::fma:
	setValue(&I, DAG.getNode(
	ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
	getValue(I.getArgOperand(2)), Flags));
	return;
	#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
	case Intrinsic::INTRINSIC:
	#include "llvm/IR/ConstrainedOps.def"
	visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
	return;
	#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
	#include "llvm/IR/VPIntrinsics.def"
	visitVectorPredicationIntrinsic(cast<VPIntrinsic>(I));
	return;
	case Intrinsic::fptrunc_round: {
	// Get the last argument, the metadata and convert it to an integer in the
	// call
	Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata();
	Optional<RoundingMode> RoundMode =
	convertStrToRoundingMode(cast<MDString>(MD)->getString());

	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	// Propagate fast-math-flags from IR to node(s).
	SDNodeFlags Flags;
	Flags.copyFMF(*cast<FPMathOperator>(&I));
	SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);

	SDValue Result;
	Result = DAG.getNode(
	ISD::FPTRUNC_ROUND, sdl, VT, getValue(I.getArgOperand(0)),
	DAG.getTargetConstant((int)*RoundMode, sdl,
	TLI.getPointerTy(DAG.getDataLayout())));
	setValue(&I, Result);

	return;
	}
	case Intrinsic::fmuladd: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
	TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
	setValue(&I, DAG.getNode(ISD::FMA, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)),
	getValue(I.getArgOperand(2)), Flags));
	} else {
	// TODO: Intrinsic calls should have fast-math-flags.
	SDValue Mul = DAG.getNode(
	ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags);
	SDValue Add = DAG.getNode(ISD::FADD, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	Mul, getValue(I.getArgOperand(2)), Flags);
	setValue(&I, Add);
	}
	return;
	}
	case Intrinsic::convert_to_fp16:
	setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
	DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16,
	getValue(I.getArgOperand(0)),
	DAG.getTargetConstant(0, sdl,
	MVT::i32))));
	return;
	case Intrinsic::convert_from_fp16:
	setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
	TLI.getValueType(DAG.getDataLayout(), I.getType()),
	DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
	getValue(I.getArgOperand(0)))));
	return;
	case Intrinsic::fptosi_sat: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_SINT_SAT, sdl, VT,
	getValue(I.getArgOperand(0)),
	DAG.getValueType(VT.getScalarType())));
	return;
	}
	case Intrinsic::fptoui_sat: {
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::FP_TO_UINT_SAT, sdl, VT,
	getValue(I.getArgOperand(0)),
	DAG.getValueType(VT.getScalarType())));
	return;
	}
	case Intrinsic::set_rounding:
	Res = DAG.getNode(ISD::SET_ROUNDING, sdl, MVT::Other,
	{getRoot(), getValue(I.getArgOperand(0))});
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(0));
	return;
	case Intrinsic::is_fpclass: {
	const DataLayout DLayout = DAG.getDataLayout();
	EVT DestVT = TLI.getValueType(DLayout, I.getType());
	EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
	unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
	MachineFunction &MF = DAG.getMachineFunction();
	const Function &F = MF.getFunction();
	SDValue Op = getValue(I.getArgOperand(0));
	SDNodeFlags Flags;
	Flags.setNoFPExcept(
	!F.getAttributes().hasFnAttr(llvm::Attribute::StrictFP));
	// If ISD::IS_FPCLASS should be expanded, do it right now, because the
	// expansion can use illegal types. Making expansion early allows
	// legalizing these types prior to selection.
	if (!TLI.isOperationLegalOrCustom(ISD::IS_FPCLASS, ArgVT)) {
	SDValue Result = TLI.expandIS_FPCLASS(DestVT, Op, Test, Flags, sdl, DAG);
	setValue(&I, Result);
	return;
	}

	SDValue Check = DAG.getTargetConstant(Test, sdl, MVT::i32);
	SDValue V = DAG.getNode(ISD::IS_FPCLASS, sdl, DestVT, {Op, Check}, Flags);
	setValue(&I, V);
	return;
	}
	case Intrinsic::pcmarker: {
	SDValue Tmp = getValue(I.getArgOperand(0));
	DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
	return;
	}
	case Intrinsic::readcyclecounter: {
	SDValue Op = getRoot();
	Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
	DAG.getVTList(MVT::i64, MVT::Other), Op);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::bitreverse:
	setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::bswap:
	setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
	getValue(I.getArgOperand(0)).getValueType(),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::cttz: {
	SDValue Arg = getValue(I.getArgOperand(0));
	ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
	sdl, Ty, Arg));
	return;
	}
	case Intrinsic::ctlz: {
	SDValue Arg = getValue(I.getArgOperand(0));
	ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
	sdl, Ty, Arg));
	return;
	}
	case Intrinsic::ctpop: {
	SDValue Arg = getValue(I.getArgOperand(0));
	EVT Ty = Arg.getValueType();
	setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
	return;
	}
	case Intrinsic::fshl:
	case Intrinsic::fshr: {
	bool IsFSHL = Intrinsic == Intrinsic::fshl;
	SDValue X = getValue(I.getArgOperand(0));
	SDValue Y = getValue(I.getArgOperand(1));
	SDValue Z = getValue(I.getArgOperand(2));
	EVT VT = X.getValueType();

	if (X == Y) {
	auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
	setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
	} else {
	auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
	setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
	}
	return;
	}
	case Intrinsic::sadd_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::uadd_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::ssub_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::usub_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::sshl_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SSHLSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::ushl_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::USHLSAT, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::smul_fix:
	case Intrinsic::umul_fix:
	case Intrinsic::smul_fix_sat:
	case Intrinsic::umul_fix_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
	Op1.getValueType(), Op1, Op2, Op3));
	return;
	}
	case Intrinsic::sdiv_fix:
	case Intrinsic::udiv_fix:
	case Intrinsic::sdiv_fix_sat:
	case Intrinsic::udiv_fix_sat: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	SDValue Op3 = getValue(I.getArgOperand(2));
	setValue(&I, expandDivFix(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
	Op1, Op2, Op3, DAG, TLI));
	return;
	}
	case Intrinsic::smax: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SMAX, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::smin: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::SMIN, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::umax: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UMAX, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::umin: {
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::UMIN, sdl, Op1.getValueType(), Op1, Op2));
	return;
	}
	case Intrinsic::abs: {
	// TODO: Preserve "int min is poison" arg in SDAG?
	SDValue Op1 = getValue(I.getArgOperand(0));
	setValue(&I, DAG.getNode(ISD::ABS, sdl, Op1.getValueType(), Op1));
	return;
	}
	case Intrinsic::stacksave: {
	SDValue Op = getRoot();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op);
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;
	}
	case Intrinsic::stackrestore:
	Res = getValue(I.getArgOperand(0));
	DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
	return;
	case Intrinsic::get_dynamic_area_offset: {
	SDValue Op = getRoot();
	EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());
	EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
	// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
	// target.
	if (PtrTy.getFixedSizeInBits() < ResTy.getFixedSizeInBits())
	report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
	" intrinsic!");
	Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
	Op);
	DAG.setRoot(Op);
	setValue(&I, Res);
	return;
	}
	case Intrinsic::stackguard: {
	MachineFunction &MF = DAG.getMachineFunction();
	const Module &M = *MF.getFunction().getParent();
	SDValue Chain = getRoot();
	if (TLI.useLoadStackGuardNode()) {
	Res = getLoadStackGuard(DAG, sdl, Chain);
	} else {
	EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
	const Value *Global = TLI.getSDagStackGuard(M);
	Align Align = DAG.getDataLayout().getPrefTypeAlign(Global->getType());
	Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global),
	MachinePointerInfo(Global, 0), Align,
	MachineMemOperand::MOVolatile);
	}
	if (TLI.useStackGuardXorFP())
	Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
	DAG.setRoot(Chain);
	setValue(&I, Res);
	return;
	}
	case Intrinsic::stackprotector: {
	// Emit code into the DAG to store the stack guard onto the stack.
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	SDValue Src, Chain = getRoot();

	if (TLI.useLoadStackGuardNode())
	Src = getLoadStackGuard(DAG, sdl, Chain);
	else
	Src = getValue(I.getArgOperand(0)); // The guard's value.

	AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));

	int FI = FuncInfo.StaticAllocaMap[Slot];
	MFI.setStackProtectorIndex(FI);
	EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout());

	SDValue FIN = DAG.getFrameIndex(FI, PtrTy);

	// Store the stack protector onto the stack.
	Res = DAG.getStore(
	Chain, sdl, Src, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
	MaybeAlign(), MachineMemOperand::MOVolatile);
	setValue(&I, Res);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::objectsize:
	llvm_unreachable("llvm.objectsize.* should have been lowered already");

	case Intrinsic::is_constant:
	llvm_unreachable("llvm.is.constant.* should have been lowered already");

	case Intrinsic::annotation:
	case Intrinsic::ptr_annotation:
	case Intrinsic::launder_invariant_group:
	case Intrinsic::strip_invariant_group:
	// Drop the intrinsic, but forward the value
	setValue(&I, getValue(I.getOperand(0)));
	return;

	case Intrinsic::assume:
	case Intrinsic::experimental_noalias_scope_decl:
	case Intrinsic::var_annotation:
	case Intrinsic::sideeffect:
	// Discard annotate attributes, noalias scope declarations, assumptions, and
	// artificial side-effects.
	return;

	case Intrinsic::codeview_annotation: {
	// Emit a label associated with this metadata.
	MachineFunction &MF = DAG.getMachineFunction();
	MCSymbol *Label =
	MF.getMMI().getContext().createTempSymbol("annotation", true);
	Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
	MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
	Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
	DAG.setRoot(Res);
	return;
	}

	case Intrinsic::init_trampoline: {
	const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());

	SDValue Ops[6];
	Ops[0] = getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	Ops[2] = getValue(I.getArgOperand(1));
	Ops[3] = getValue(I.getArgOperand(2));
	Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
	Ops[5] = DAG.getSrcValue(F);

	Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);

	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::adjust_trampoline:
	setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
	TLI.getPointerTy(DAG.getDataLayout()),
	getValue(I.getArgOperand(0))));
	return;
	case Intrinsic::gcroot: {
	assert(DAG.getMachineFunction().getFunction().hasGC() &&
	"only valid in functions with gc specified, enforced by Verifier");
	assert(GFI && "implied by previous");
	const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
	const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));

	FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
	GFI->addStackRoot(FI->getIndex(), TypeMap);
	return;
	}
	case Intrinsic::gcread:
	case Intrinsic::gcwrite:
	llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
	case Intrinsic::flt_rounds:
	Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot());
	setValue(&I, Res);
	DAG.setRoot(Res.getValue(1));
	return;

	case Intrinsic::expect:
	// Just replace __builtin_expect(exp, c) with EXP.
	setValue(&I, getValue(I.getArgOperand(0)));
	return;

	case Intrinsic::ubsantrap:
	case Intrinsic::debugtrap:
	case Intrinsic::trap: {
	StringRef TrapFuncName =
	I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
	if (TrapFuncName.empty()) {
	switch (Intrinsic) {
	case Intrinsic::trap:
	DAG.setRoot(DAG.getNode(ISD::TRAP, sdl, MVT::Other, getRoot()));
	break;
	case Intrinsic::debugtrap:
	DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, sdl, MVT::Other, getRoot()));
	break;
	case Intrinsic::ubsantrap:
	DAG.setRoot(DAG.getNode(
	ISD::UBSANTRAP, sdl, MVT::Other, getRoot(),
	DAG.getTargetConstant(
	cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(), sdl,
	MVT::i32)));
	break;
	default: llvm_unreachable("unknown trap intrinsic");
	}
	return;
	}
	TargetLowering::ArgListTy Args;
	if (Intrinsic == Intrinsic::ubsantrap) {
	Args.push_back(TargetLoweringBase::ArgListEntry());
	Args[0].Val = I.getArgOperand(0);
	Args[0].Node = getValue(Args[0].Val);
	Args[0].Ty = Args[0].Val->getType();
	}

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee(
	CallingConv::C, I.getType(),
	DAG.getExternalSymbol(TrapFuncName.data(),
	TLI.getPointerTy(DAG.getDataLayout())),
	std::move(Args));

	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
	DAG.setRoot(Result.second);
	return;
	}

	case Intrinsic::uadd_with_overflow:
	case Intrinsic::sadd_with_overflow:
	case Intrinsic::usub_with_overflow:
	case Intrinsic::ssub_with_overflow:
	case Intrinsic::umul_with_overflow:
	case Intrinsic::smul_with_overflow: {
	ISD::NodeType Op;
	switch (Intrinsic) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
	case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
	case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
	case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
	case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
	case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
	}
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2 = getValue(I.getArgOperand(1));

	EVT ResultVT = Op1.getValueType();
	EVT OverflowVT = MVT::i1;
	if (ResultVT.isVector())
	OverflowVT = EVT::getVectorVT(
	*Context, OverflowVT, ResultVT.getVectorElementCount());

	SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
	setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
	return;
	}
	case Intrinsic::prefetch: {
	SDValue Ops[5];
	unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
	auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
	Ops[0] = DAG.getRoot();
	Ops[1] = getValue(I.getArgOperand(0));
	Ops[2] = getValue(I.getArgOperand(1));
	Ops[3] = getValue(I.getArgOperand(2));
	Ops[4] = getValue(I.getArgOperand(3));
	SDValue Result = DAG.getMemIntrinsicNode(
	ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops,
	EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)),
	/* align */ None, Flags);

	// Chain the prefetch in parallell with any pending loads, to stay out of
	// the way of later optimizations.
	PendingLoads.push_back(Result);
	Result = getRoot();
	DAG.setRoot(Result);
	return;
	}
	case Intrinsic::lifetime_start:
	case Intrinsic::lifetime_end: {
	bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
	// Stack coloring is not enabled in O0, discard region information.
	if (TM.getOptLevel() == CodeGenOpt::None)
	return;

	const int64_t ObjectSize =
	cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
	Value *const ObjectPtr = I.getArgOperand(1);
	SmallVector<const Value *, 4> Allocas;
	getUnderlyingObjects(ObjectPtr, Allocas);

	for (const Value *Alloca : Allocas) {
	const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(Alloca);

	// Could not find an Alloca.
	if (!LifetimeObject)
	continue;

	// First check that the Alloca is static, otherwise it won't have a
	// valid frame index.
	auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
	if (SI == FuncInfo.StaticAllocaMap.end())
	return;

	const int FrameIndex = SI->second;
	int64_t Offset;
	if (GetPointerBaseWithConstantOffset(
	ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
	Offset = -1; // Cannot determine offset from alloca to lifetime object.
	Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
	Offset);
	DAG.setRoot(Res);
	}
	return;
	}
	case Intrinsic::pseudoprobe: {
	auto Guid = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue();
	auto Index = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
	auto Attr = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
	Res = DAG.getPseudoProbeNode(sdl, getRoot(), Guid, Index, Attr);
	DAG.setRoot(Res);
	return;
	}
	case Intrinsic::invariant_start:
	// Discard region information.
	setValue(&I,
	DAG.getUNDEF(TLI.getValueType(DAG.getDataLayout(), I.getType())));
	return;
	case Intrinsic::invariant_end:
	// Discard region information.
	return;
	case Intrinsic::clear_cache:
	/// FunctionName may be null.
	if (const char *FunctionName = TLI.getClearCacheBuiltinName())
	lowerCallToExternalSymbol(I, FunctionName);
	return;
	case Intrinsic::donothing:
	case Intrinsic::seh_try_begin:
	case Intrinsic::seh_scope_begin:
	case Intrinsic::seh_try_end:
	case Intrinsic::seh_scope_end:
	// ignore
	return;
	case Intrinsic::experimental_stackmap:
	visitStackmap(I);
	return;
	case Intrinsic::experimental_patchpoint_void:
	case Intrinsic::experimental_patchpoint_i64:
	visitPatchpoint(I);
	return;
	case Intrinsic::experimental_gc_statepoint:
	LowerStatepoint(cast<GCStatepointInst>(I));
	return;
	case Intrinsic::experimental_gc_result:
	visitGCResult(cast<GCResultInst>(I));
	return;
	case Intrinsic::experimental_gc_relocate:
	visitGCRelocate(cast<GCRelocateInst>(I));
	return;
	case Intrinsic::instrprof_cover:
	llvm_unreachable("instrprof failed to lower a cover");
	case Intrinsic::instrprof_increment:
	llvm_unreachable("instrprof failed to lower an increment");
	case Intrinsic::instrprof_value_profile:
	llvm_unreachable("instrprof failed to lower a value profiling call");
	case Intrinsic::localescape: {
	MachineFunction &MF = DAG.getMachineFunction();
	const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();

	// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
	// is the same on all targets.
	for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
	Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
	if (isa<ConstantPointerNull>(Arg))
	continue; // Skip null pointers. They represent a hole in index space.
	AllocaInst *Slot = cast<AllocaInst>(Arg);
	assert(FuncInfo.StaticAllocaMap.count(Slot) &&
	"can only escape static allocas");
	int FI = FuncInfo.StaticAllocaMap[Slot];
	MCSymbol *FrameAllocSym =
	MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
	GlobalValue::dropLLVMManglingEscape(MF.getName()), Idx);
	BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl,
	TII->get(TargetOpcode::LOCAL_ESCAPE))
	.addSym(FrameAllocSym)
	.addFrameIndex(FI);
	}

	return;
	}

	case Intrinsic::localrecover: {
	// i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx)
	MachineFunction &MF = DAG.getMachineFunction();

	// Get the symbol that defines the frame offset.
	auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
	auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
	unsigned IdxVal =
	unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
	MCSymbol *FrameAllocSym =
	MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
	GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);

	Value *FP = I.getArgOperand(1);
	SDValue FPVal = getValue(FP);
	EVT PtrVT = FPVal.getValueType();

	// Create a MCSymbol for the label to avoid any target lowering
	// that would make this PC relative.
	SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT);
	SDValue OffsetVal =
	DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym);

	// Add the offset to the FP.
	SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl);
	setValue(&I, Add);

	return;
	}

	case Intrinsic::eh_exceptionpointer:
	case Intrinsic::eh_exceptioncode: {
	// Get the exception pointer vreg, copy from it, and resize it to fit.
	const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
	MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
	unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
	SDValue N = DAG.getCopyFromReg(DAG.getEntryNode(), sdl, VReg, PtrVT);
	if (Intrinsic == Intrinsic::eh_exceptioncode)
	N = DAG.getZExtOrTrunc(N, sdl, MVT::i32);
	setValue(&I, N);
	return;
	}
	case Intrinsic::xray_customevent: {
	// Here we want to make sure that the intrinsic behaves as if it has a
	// specific calling convention, and only for x86_64.
	// FIXME: Support other platforms later.
	const auto &Triple = DAG.getTarget().getTargetTriple();
	if (Triple.getArch() != Triple::x86_64)
	return;

	SmallVector<SDValue, 8> Ops;

	// We want to say that we always want the arguments in registers.
	SDValue LogEntryVal = getValue(I.getArgOperand(0));
	SDValue StrSizeVal = getValue(I.getArgOperand(1));
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Chain = getRoot();
	Ops.push_back(LogEntryVal);
	Ops.push_back(StrSizeVal);
	Ops.push_back(Chain);

	// We need to enforce the calling convention for the callsite, so that
	// argument ordering is enforced correctly, and that register allocation can
	// see that some registers may be assumed clobbered and have to preserve
	// them across calls to the intrinsic.
	MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL,
	sdl, NodeTys, Ops);
	SDValue patchableNode = SDValue(MN, 0);
	DAG.setRoot(patchableNode);
	setValue(&I, patchableNode);
	return;
	}
	case Intrinsic::xray_typedevent: {
	// Here we want to make sure that the intrinsic behaves as if it has a
	// specific calling convention, and only for x86_64.
	// FIXME: Support other platforms later.
	const auto &Triple = DAG.getTarget().getTargetTriple();
	if (Triple.getArch() != Triple::x86_64)
	return;

	SmallVector<SDValue, 8> Ops;

	// We want to say that we always want the arguments in registers.
	// It's unclear to me how manipulating the selection DAG here forces callers
	// to provide arguments in registers instead of on the stack.
	SDValue LogTypeId = getValue(I.getArgOperand(0));
	SDValue LogEntryVal = getValue(I.getArgOperand(1));
	SDValue StrSizeVal = getValue(I.getArgOperand(2));
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SDValue Chain = getRoot();
	Ops.push_back(LogTypeId);
	Ops.push_back(LogEntryVal);
	Ops.push_back(StrSizeVal);
	Ops.push_back(Chain);

	// We need to enforce the calling convention for the callsite, so that
	// argument ordering is enforced correctly, and that register allocation can
	// see that some registers may be assumed clobbered and have to preserve
	// them across calls to the intrinsic.
	MachineSDNode *MN = DAG.getMachineNode(
	TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, sdl, NodeTys, Ops);
	SDValue patchableNode = SDValue(MN, 0);
	DAG.setRoot(patchableNode);
	setValue(&I, patchableNode);
	return;
	}
	case Intrinsic::experimental_deoptimize:
	LowerDeoptimizeCall(&I);
	return;
	case Intrinsic::experimental_stepvector:
	visitStepVector(I);
	return;
	case Intrinsic::vector_reduce_fadd:
	case Intrinsic::vector_reduce_fmul:
	case Intrinsic::vector_reduce_add:
	case Intrinsic::vector_reduce_mul:
	case Intrinsic::vector_reduce_and:
	case Intrinsic::vector_reduce_or:
	case Intrinsic::vector_reduce_xor:
	case Intrinsic::vector_reduce_smax:
	case Intrinsic::vector_reduce_smin:
	case Intrinsic::vector_reduce_umax:
	case Intrinsic::vector_reduce_umin:
	case Intrinsic::vector_reduce_fmax:
	case Intrinsic::vector_reduce_fmin:
	visitVectorReduce(I, Intrinsic);
	return;

	case Intrinsic::icall_branch_funnel: {
	SmallVector<SDValue, 16> Ops;
	Ops.push_back(getValue(I.getArgOperand(0)));

	int64_t Offset;
	auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
	I.getArgOperand(1), Offset, DAG.getDataLayout()));
	if (!Base)
	report_fatal_error(
	"llvm.icall.branch.funnel operand must be a GlobalValue");
	Ops.push_back(DAG.getTargetGlobalAddress(Base, sdl, MVT::i64, 0));

	struct BranchFunnelTarget {
	int64_t Offset;
	SDValue Target;
	};
	SmallVector<BranchFunnelTarget, 8> Targets;

	for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
	auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
	I.getArgOperand(Op), Offset, DAG.getDataLayout()));
	if (ElemBase != Base)
	report_fatal_error("all llvm.icall.branch.funnel operands must refer "
	"to the same GlobalValue");

	SDValue Val = getValue(I.getArgOperand(Op + 1));
	auto *GA = dyn_cast<GlobalAddressSDNode>(Val);
	if (!GA)
	report_fatal_error(
	"llvm.icall.branch.funnel operand must be a GlobalValue");
	Targets.push_back({Offset, DAG.getTargetGlobalAddress(
	GA->getGlobal(), sdl, Val.getValueType(),
	GA->getOffset())});
	}
	llvm::sort(Targets,
	[](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) {
	return T1.Offset < T2.Offset;
	});

	for (auto &T : Targets) {
	Ops.push_back(DAG.getTargetConstant(T.Offset, sdl, MVT::i32));
	Ops.push_back(T.Target);
	}

	Ops.push_back(DAG.getRoot()); // Chain
	SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, sdl,
	MVT::Other, Ops),
	0);
	DAG.setRoot(N);
	setValue(&I, N);
	HasTailCall = true;
	return;
	}

	case Intrinsic::wasm_landingpad_index:
	// Information this intrinsic contained has been transferred to
	// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
	// delete it now.
	return;

	case Intrinsic::aarch64_settag:
	case Intrinsic::aarch64_settag_zero: {
	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
	SDValue Val = TSI.EmitTargetCodeForSetTag(
	DAG, sdl, getRoot(), getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
	ZeroMemory);
	DAG.setRoot(Val);
	setValue(&I, Val);
	return;
	}
	case Intrinsic::ptrmask: {
	SDValue Ptr = getValue(I.getOperand(0));
	SDValue Const = getValue(I.getOperand(1));

	EVT PtrVT = Ptr.getValueType();
	setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
	DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
	return;
	}
	case Intrinsic::get_active_lane_mask: {
	EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	SDValue Index = getValue(I.getOperand(0));
	EVT ElementVT = Index.getValueType();

	if (!TLI.shouldExpandGetActiveLaneMask(CCVT, ElementVT)) {
	visitTargetIntrinsic(I, Intrinsic);
	return;
	}

	SDValue TripCount = getValue(I.getOperand(1));
	auto VecTy = CCVT.changeVectorElementType(ElementVT);

	SDValue VectorIndex, VectorTripCount;
	if (VecTy.isScalableVector()) {
	VectorIndex = DAG.getSplatVector(VecTy, sdl, Index);
	VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount);
	} else {
	VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index);
	VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount);
	}
	SDValue VectorStep = DAG.getStepVector(sdl, VecTy);
	SDValue VectorInduction = DAG.getNode(
	ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep);
	SDValue SetCC = DAG.getSetCC(sdl, CCVT, VectorInduction,
	VectorTripCount, ISD::CondCode::SETULT);
	setValue(&I, SetCC);
	return;
	}
	case Intrinsic::vector_insert: {
	SDValue Vec = getValue(I.getOperand(0));
	SDValue SubVec = getValue(I.getOperand(1));
	SDValue Index = getValue(I.getOperand(2));

	// The intrinsic's index type is i64, but the SDNode requires an index type
	// suitable for the target. Convert the index as required.
	MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
	if (Index.getValueType() != VectorIdxTy)
	Index = DAG.getVectorIdxConstant(
	cast<ConstantSDNode>(Index)->getZExtValue(), sdl);

	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, ResultVT, Vec, SubVec,
	Index));
	return;
	}
	case Intrinsic::vector_extract: {
	SDValue Vec = getValue(I.getOperand(0));
	SDValue Index = getValue(I.getOperand(1));
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	// The intrinsic's index type is i64, but the SDNode requires an index type
	// suitable for the target. Convert the index as required.
	MVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
	if (Index.getValueType() != VectorIdxTy)
	Index = DAG.getVectorIdxConstant(
	cast<ConstantSDNode>(Index)->getZExtValue(), sdl);

	setValue(&I,
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
	return;
	}
	case Intrinsic::experimental_vector_reverse:
	visitVectorReverse(I);
	return;
	case Intrinsic::experimental_vector_splice:
	visitVectorSplice(I);
	return;
	}
	}

	void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
	const ConstrainedFPIntrinsic &FPI) {
	SDLoc sdl = getCurSDLoc();

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs);
	ValueVTs.push_back(MVT::Other); // Out chain

	// We do not need to serialize constrained FP intrinsics against
	// each other or against (nonvolatile) loads, so they can be
	// chained like loads.
	SDValue Chain = DAG.getRoot();
	SmallVector<SDValue, 4> Opers;
	Opers.push_back(Chain);
	if (FPI.isUnaryOp()) {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	} else if (FPI.isTernaryOp()) {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	Opers.push_back(getValue(FPI.getArgOperand(1)));
	Opers.push_back(getValue(FPI.getArgOperand(2)));
	} else {
	Opers.push_back(getValue(FPI.getArgOperand(0)));
	Opers.push_back(getValue(FPI.getArgOperand(1)));
	}

	auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) {
	assert(Result.getNode()->getNumValues() == 2);

	// Push node to the appropriate list so that future instructions can be
	// chained up correctly.
	SDValue OutChain = Result.getValue(1);
	switch (EB) {
	case fp::ExceptionBehavior::ebIgnore:
	// The only reason why ebIgnore nodes still need to be chained is that
	// they might depend on the current rounding mode, and therefore must
	// not be moved across instruction that may change that mode.
	LLVM_FALLTHROUGH;
	case fp::ExceptionBehavior::ebMayTrap:
	// These must not be moved across calls or instructions that may change
	// floating-point exception masks.
	PendingConstrainedFP.push_back(OutChain);
	break;
	case fp::ExceptionBehavior::ebStrict:
	// These must not be moved across calls or instructions that may change
	// floating-point exception masks or read floating-point exception flags.
	// In addition, they cannot be optimized out even if unused.
	PendingConstrainedFPStrict.push_back(OutChain);
	break;
	}
	};

	SDVTList VTs = DAG.getVTList(ValueVTs);
	fp::ExceptionBehavior EB = *FPI.getExceptionBehavior();

	SDNodeFlags Flags;
	if (EB == fp::ExceptionBehavior::ebIgnore)
	Flags.setNoFPExcept(true);

	if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI))
	Flags.copyFMF(*FPOp);

	unsigned Opcode;
	switch (FPI.getIntrinsicID()) {
	default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
	#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
	case Intrinsic::INTRINSIC: \
	Opcode = ISD::STRICT_##DAGN; \
	break;
	#include "llvm/IR/ConstrainedOps.def"
	case Intrinsic::experimental_constrained_fmuladd: {
	Opcode = ISD::STRICT_FMA;
	// Break fmuladd into fmul and fadd.
	if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict \|\|
	!TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(),
	ValueVTs[0])) {
	Opers.pop_back();
	SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags);
	pushOutChain(Mul, EB);
	Opcode = ISD::STRICT_FADD;
	Opers.clear();
	Opers.push_back(Mul.getValue(1));
	Opers.push_back(Mul.getValue(0));
	Opers.push_back(getValue(FPI.getArgOperand(2)));
	}
	break;
	}
	}

	// A few strict DAG nodes carry additional operands that are not
	// set up by the default code above.
	switch (Opcode) {
	default: break;
	case ISD::STRICT_FP_ROUND:
	Opers.push_back(
	DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())));
	break;
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS: {
	auto *FPCmp = dyn_cast<ConstrainedFPCmpIntrinsic>(&FPI);
	ISD::CondCode Condition = getFCmpCondCode(FPCmp->getPredicate());
	if (TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);
	Opers.push_back(DAG.getCondCode(Condition));
	break;
	}
	}

	SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags);
	pushOutChain(Result, EB);

	SDValue FPResult = Result.getValue(0);
	setValue(&FPI, FPResult);
	}

	static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
	Optional<unsigned> ResOPC;
	switch (VPIntrin.getIntrinsicID()) {
	#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \
	case Intrinsic::VPID: \
	ResOPC = ISD::VPSD; \
	break;
	#include "llvm/IR/VPIntrinsics.def"
	}

	if (!ResOPC)
	llvm_unreachable(
	"Inconsistency: no SDNode available for this VPIntrinsic!");

	if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD \|\|
	*ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
	if (VPIntrin.getFastMathFlags().allowReassoc())
	return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
	: ISD::VP_REDUCE_FMUL;
	}

	return *ResOPC;
	}

	void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
	SmallVector<SDValue, 7> &OpValues,
	bool IsGather) {
	SDLoc DL = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	Value *PtrOperand = VPIntrin.getArgOperand(0);
	MaybeAlign Alignment = VPIntrin.getPointerAlignment();
	AAMDNodes AAInfo = VPIntrin.getAAMetadata();
	const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
	SDValue LD;
	bool AddToChain = true;
	if (!IsGather) {
	// Do not serialize variable-length loads of constant memory with
	// anything.
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);
	MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
	AddToChain = !AA \|\| !AA->pointsToConstantMemory(ML);
	SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
	MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
	LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
	MMO, false /IsExpanding /);
	} else {
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT.getScalarType());
	unsigned AS =
	PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOLoad,
	MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
	SDValue Base, Index, Scale;
	ISD::MemIndexType IndexType;
	bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
	this, VPIntrin.getParent(),
	VT.getScalarStoreSize());
	if (!UniformBase) {
	Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(PtrOperand);
	IndexType = ISD::SIGNED_SCALED;
	Scale =
	DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
	}
	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
	}
	LD = DAG.getGatherVP(
	DAG.getVTList(VT, MVT::Other), VT, DL,
	{DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
	IndexType);
	}
	if (AddToChain)
	PendingLoads.push_back(LD.getValue(1));
	setValue(&VPIntrin, LD);
	}

	void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
	SmallVector<SDValue, 7> &OpValues,
	bool IsScatter) {
	SDLoc DL = getCurSDLoc();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	Value *PtrOperand = VPIntrin.getArgOperand(1);
	EVT VT = OpValues[0].getValueType();
	MaybeAlign Alignment = VPIntrin.getPointerAlignment();
	AAMDNodes AAInfo = VPIntrin.getAAMetadata();
	SDValue ST;
	if (!IsScatter) {
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT);
	SDValue Ptr = OpValues[1];
	SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
	MemoryLocation::UnknownSize, *Alignment, AAInfo);
	ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset,
	OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED,
	/* IsTruncating / false, /IsCompressing*/ false);
	} else {
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT.getScalarType());
	unsigned AS =
	PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(AS), MachineMemOperand::MOStore,
	MemoryLocation::UnknownSize, *Alignment, AAInfo);
	SDValue Base, Index, Scale;
	ISD::MemIndexType IndexType;
	bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
	this, VPIntrin.getParent(),
	VT.getScalarStoreSize());
	if (!UniformBase) {
	Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
	Index = getValue(PtrOperand);
	IndexType = ISD::SIGNED_SCALED;
	Scale =
	DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
	}
	EVT IdxVT = Index.getValueType();
	EVT EltTy = IdxVT.getVectorElementType();
	if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
	EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
	Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
	}
	ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
	{getMemoryRoot(), OpValues[0], Base, Index, Scale,
	OpValues[2], OpValues[3]},
	MMO, IndexType);
	}
	DAG.setRoot(ST);
	setValue(&VPIntrin, ST);
	}

	void SelectionDAGBuilder::visitVPStridedLoad(
	const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
	SDLoc DL = getCurSDLoc();
	Value *PtrOperand = VPIntrin.getArgOperand(0);
	MaybeAlign Alignment = VPIntrin.getPointerAlignment();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT.getScalarType());
	AAMDNodes AAInfo = VPIntrin.getAAMetadata();
	const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
	MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
	bool AddToChain = !AA \|\| !AA->pointsToConstantMemory(ML);
	SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
	MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);

	SDValue LD = DAG.getStridedLoadVP(VT, DL, InChain, OpValues[0], OpValues[1],
	OpValues[2], OpValues[3], MMO,
	false /IsExpanding/);

	if (AddToChain)
	PendingLoads.push_back(LD.getValue(1));
	setValue(&VPIntrin, LD);
	}

	void SelectionDAGBuilder::visitVPStridedStore(
	const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
	SDLoc DL = getCurSDLoc();
	Value *PtrOperand = VPIntrin.getArgOperand(1);
	EVT VT = OpValues[0].getValueType();
	MaybeAlign Alignment = VPIntrin.getPointerAlignment();
	if (!Alignment)
	Alignment = DAG.getEVTAlign(VT.getScalarType());
	AAMDNodes AAInfo = VPIntrin.getAAMetadata();
	MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
	MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
	MemoryLocation::UnknownSize, *Alignment, AAInfo);

	SDValue ST = DAG.getStridedStoreVP(
	getMemoryRoot(), DL, OpValues[0], OpValues[1],
	DAG.getUNDEF(OpValues[1].getValueType()), OpValues[2], OpValues[3],
	OpValues[4], VT, MMO, ISD::UNINDEXED, /IsTruncating/ false,
	/IsCompressing/ false);

	DAG.setRoot(ST);
	setValue(&VPIntrin, ST);
	}

	void SelectionDAGBuilder::visitVPCmp(const VPCmpIntrinsic &VPIntrin) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDLoc DL = getCurSDLoc();

	ISD::CondCode Condition;
	CmpInst::Predicate CondCode = VPIntrin.getPredicate();
	bool IsFP = VPIntrin.getOperand(0)->getType()->isFPOrFPVectorTy();
	if (IsFP) {
	// FIXME: Regular fcmps are FPMathOperators which may have fast-math (nnan)
	// flags, but calls that don't return floating-point types can't be
	// FPMathOperators, like vp.fcmp. This affects constrained fcmp too.
	Condition = getFCmpCondCode(CondCode);
	if (TM.Options.NoNaNsFPMath)
	Condition = getFCmpCodeWithoutNaN(Condition);
	} else {
	Condition = getICmpCondCode(CondCode);
	}

	SDValue Op1 = getValue(VPIntrin.getOperand(0));
	SDValue Op2 = getValue(VPIntrin.getOperand(1));
	// #2 is the condition code
	SDValue MaskOp = getValue(VPIntrin.getOperand(3));
	SDValue EVL = getValue(VPIntrin.getOperand(4));
	MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
	assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
	"Unexpected target EVL type");
	EVL = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, EVL);

	EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	VPIntrin.getType());
	setValue(&VPIntrin,
	DAG.getSetCCVP(DL, DestVT, Op1, Op2, Condition, MaskOp, EVL));
	}

	void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
	const VPIntrinsic &VPIntrin) {
	SDLoc DL = getCurSDLoc();
	unsigned Opcode = getISDForVPIntrinsic(VPIntrin);

	auto IID = VPIntrin.getIntrinsicID();

	if (const auto *CmpI = dyn_cast<VPCmpIntrinsic>(&VPIntrin))
	return visitVPCmp(*CmpI);

	SmallVector<EVT, 4> ValueVTs;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	ComputeValueVTs(TLI, DAG.getDataLayout(), VPIntrin.getType(), ValueVTs);
	SDVTList VTs = DAG.getVTList(ValueVTs);

	auto EVLParamPos = VPIntrinsic::getVectorLengthParamPos(IID);

	MVT EVLParamVT = TLI.getVPExplicitVectorLengthTy();
	assert(EVLParamVT.isScalarInteger() && EVLParamVT.bitsGE(MVT::i32) &&
	"Unexpected target EVL type");

	// Request operands.
	SmallVector<SDValue, 7> OpValues;
	for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
	auto Op = getValue(VPIntrin.getArgOperand(I));
	if (I == EVLParamPos)
	Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
	OpValues.push_back(Op);
	}

	switch (Opcode) {
	default: {
	SDNodeFlags SDFlags;
	if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin))
	SDFlags.copyFMF(*FPMO);
	SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues, SDFlags);
	setValue(&VPIntrin, Result);
	break;
	}
	case ISD::VP_LOAD:
	case ISD::VP_GATHER:
	visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
	Opcode == ISD::VP_GATHER);
	break;
	case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
	visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues);
	break;
	case ISD::VP_STORE:
	case ISD::VP_SCATTER:
	visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
	break;
	case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
	visitVPStridedStore(VPIntrin, OpValues);
	break;
	}
	}

	SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
	const BasicBlock *EHPadBB,
	MCSymbol *&BeginLabel) {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineModuleInfo &MMI = MF.getMMI();

	// Insert a label before the invoke call to mark the try range. This can be
	// used to detect deletion of the invoke via the MachineModuleInfo.
	BeginLabel = MMI.getContext().createTempSymbol();

	// For SjLj, keep track of which landing pads go with which invokes
	// so as to maintain the ordering of pads in the LSDA.
	unsigned CallSiteIndex = MMI.getCurrentCallSite();
	if (CallSiteIndex) {
	MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
	LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);

	// Now that the call site is handled, stop tracking it.
	MMI.setCurrentCallSite(0);
	}

	return DAG.getEHLabel(getCurSDLoc(), Chain, BeginLabel);
	}

	SDValue SelectionDAGBuilder::lowerEndEH(SDValue Chain, const InvokeInst *II,
	const BasicBlock *EHPadBB,
	MCSymbol *BeginLabel) {
	assert(BeginLabel && "BeginLabel should've been set");

	MachineFunction &MF = DAG.getMachineFunction();
	MachineModuleInfo &MMI = MF.getMMI();

	// Insert a label at the end of the invoke call to mark the try range. This
	// can be used to detect deletion of the invoke via the MachineModuleInfo.
	MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
	Chain = DAG.getEHLabel(getCurSDLoc(), Chain, EndLabel);

	// Inform MachineModuleInfo of range.
	auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
	// There is a platform (e.g. wasm) that uses funclet style IR but does not
	// actually use outlined funclets and their LSDA info style.
	if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) {
	assert(II && "II should've been set");
	WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
	EHInfo->addIPToStateRange(II, BeginLabel, EndLabel);
	} else if (!isScopedEHPersonality(Pers)) {
	assert(EHPadBB);
	MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
	}

	return Chain;
	}

	std::pair<SDValue, SDValue>
	SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
	const BasicBlock *EHPadBB) {
	MCSymbol *BeginLabel = nullptr;

	if (EHPadBB) {
	// Both PendingLoads and PendingExports must be flushed here;
	// this call might not return.
	(void)getRoot();
	DAG.setRoot(lowerStartEH(getControlRoot(), EHPadBB, BeginLabel));
	CLI.setChain(getRoot());
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);

	assert((CLI.IsTailCall \|\| Result.second.getNode()) &&
	"Non-null chain expected with non-tail call!");
	assert((Result.second.getNode() \|\| !Result.first.getNode()) &&
	"Null value expected with tail call!");

	if (!Result.second.getNode()) {
	// As a special case, a null chain means that a tail call has been emitted
	// and the DAG root is already updated.
	HasTailCall = true;

	// Since there's no actual continuation from this block, nothing can be
	// relying on us setting vregs for them.
	PendingExports.clear();
	} else {
	DAG.setRoot(Result.second);
	}

	if (EHPadBB) {
	DAG.setRoot(lowerEndEH(getRoot(), cast_or_null<InvokeInst>(CLI.CB), EHPadBB,
	BeginLabel));
	}

	return Result;
	}

	void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
	bool isTailCall,
	bool isMustTailCall,
	const BasicBlock *EHPadBB) {
	auto &DL = DAG.getDataLayout();
	FunctionType *FTy = CB.getFunctionType();
	Type *RetTy = CB.getType();

	TargetLowering::ArgListTy Args;
	Args.reserve(CB.arg_size());

	const Value *SwiftErrorVal = nullptr;
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	if (isTailCall) {
	// Avoid emitting tail calls in functions with the disable-tail-calls
	// attribute.
	auto *Caller = CB.getParent()->getParent();
	if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() ==
	"true" && !isMustTailCall)
	isTailCall = false;

	// We can't tail call inside a function with a swifterror argument. Lowering
	// does not support this yet. It would have to move into the swifterror
	// register before the call.
	if (TLI.supportSwiftError() &&
	Caller->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
	isTailCall = false;
	}

	for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) {
	TargetLowering::ArgListEntry Entry;
	const Value V = I;

	// Skip empty types
	if (V->getType()->isEmptyTy())
	continue;

	SDValue ArgNode = getValue(V);
	Entry.Node = ArgNode; Entry.Ty = V->getType();

	Entry.setAttributes(&CB, I - CB.arg_begin());

	// Use swifterror virtual register as input to the call.
	if (Entry.IsSwiftError && TLI.supportSwiftError()) {
	SwiftErrorVal = V;
	// We find the virtual register for the actual swifterror argument.
	// Instead of using the Value, we use the virtual register instead.
	Entry.Node =
	DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V),
	EVT(TLI.getPointerTy(DL)));
	}

	Args.push_back(Entry);

	// If we have an explicit sret argument that is an Instruction, (i.e., it
	// might point to function-local memory), we can't meaningfully tail-call.
	if (Entry.IsSRet && isa<Instruction>(V))
	isTailCall = false;
	}

	// If call site has a cfguardtarget operand bundle, create and add an
	// additional ArgListEntry.
	if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) {
	TargetLowering::ArgListEntry Entry;
	Value *V = Bundle->Inputs[0];
	SDValue ArgNode = getValue(V);
	Entry.Node = ArgNode;
	Entry.Ty = V->getType();
	Entry.IsCFGuardTarget = true;
	Args.push_back(Entry);
	}

	// Check if target-independent constraints permit a tail call here.
	// Target-dependent constraints are checked within TLI->LowerCallTo.
	if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget()))
	isTailCall = false;

	// Disable tail calls if there is an swifterror argument. Targets have not
	// been updated to support tail calls.
	if (TLI.supportSwiftError() && SwiftErrorVal)
	isTailCall = false;

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(getCurSDLoc())
	.setChain(getRoot())
	.setCallee(RetTy, FTy, Callee, std::move(Args), CB)
	.setTailCall(isTailCall)
	.setConvergent(CB.isConvergent())
	.setIsPreallocated(
	CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
	std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);

	if (Result.first.getNode()) {
	Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first);
	setValue(&CB, Result.first);
	}

	// The last element of CLI.InVals has the SDValue for swifterror return.
	// Here we copy it to a virtual register and update SwiftErrorMap for
	// book-keeping.
	if (SwiftErrorVal && TLI.supportSwiftError()) {
	// Get the last element of InVals.
	SDValue Src = CLI.InVals.back();
	Register VReg =
	SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal);
	SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
	DAG.setRoot(CopyNode);
	}
	}

	static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
	SelectionDAGBuilder &Builder) {
	// Check to see if this load can be trivially constant folded, e.g. if the
	// input is from a string literal.
	if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
	// Cast pointer to the type we really want to load.
	Type *LoadTy =
	Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits());
	if (LoadVT.isVector())
	LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements());

	LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
	PointerType::getUnqual(LoadTy));

	if (const Constant *LoadCst =
	ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
	LoadTy, Builder.DAG.getDataLayout()))
	return Builder.getValue(LoadCst);
	}

	// Otherwise, we have to emit the load. If the pointer is to unfoldable but
	// still constant memory, the input chain can be the entry node.
	SDValue Root;
	bool ConstantMemory = false;

	// Do not serialize (non-volatile) loads of constant memory with anything.
	if (Builder.AA && Builder.AA->pointsToConstantMemory(PtrVal)) {
	Root = Builder.DAG.getEntryNode();
	ConstantMemory = true;
	} else {
	// Do not serialize non-volatile loads against each other.
	Root = Builder.DAG.getRoot();
	}

	SDValue Ptr = Builder.getValue(PtrVal);
	SDValue LoadVal =
	Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr,
	MachinePointerInfo(PtrVal), Align(1));

	if (!ConstantMemory)
	Builder.PendingLoads.push_back(LoadVal.getValue(1));
	return LoadVal;
	}

	/// Record the value for an instruction that produces an integer result,
	/// converting the type where necessary.
	void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
	SDValue Value,
	bool IsSigned) {
	EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType(), true);
	if (IsSigned)
	Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
	else
	Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
	setValue(&I, Value);
	}

	/// See if we can lower a memcmp/bcmp call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemCmpBCmpCall(const CallInst &I) {
	const Value LHS = I.getArgOperand(0), RHS = I.getArgOperand(1);
	const Value *Size = I.getArgOperand(2);
	const ConstantSDNode *CSize = dyn_cast<ConstantSDNode>(getValue(Size));
	if (CSize && CSize->getZExtValue() == 0) {
	EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
	I.getType(), true);
	setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT));
	return true;
	}

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res = TSI.EmitTargetCodeForMemcmp(
	DAG, getCurSDLoc(), DAG.getRoot(), getValue(LHS), getValue(RHS),
	getValue(Size), MachinePointerInfo(LHS), MachinePointerInfo(RHS));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, true);
	PendingLoads.push_back(Res.second);
	return true;
	}

	// memcmp(S1,S2,2) != 0 -> ((short)LHS != (short)RHS) != 0
	// memcmp(S1,S2,4) != 0 -> ((int)LHS != (int)RHS) != 0
	if (!CSize \|\| !isOnlyUsedInZeroEqualityComparison(&I))
	return false;

	// If the target has a fast compare for the given size, it will return a
	// preferred load type for that size. Require that the load VT is legal and
	// that the target supports unaligned loads of that type. Otherwise, return
	// INVALID.
	auto hasFastLoadsAndCompare = [&](unsigned NumBits) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	MVT LVT = TLI.hasFastEqualityCompare(NumBits);
	if (LVT != MVT::INVALID_SIMPLE_VALUE_TYPE) {
	// TODO: Handle 5 byte compare as 4-byte + 1 byte.
	// TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
	// TODO: Check alignment of src and dest ptrs.
	unsigned DstAS = LHS->getType()->getPointerAddressSpace();
	unsigned SrcAS = RHS->getType()->getPointerAddressSpace();
	if (!TLI.isTypeLegal(LVT) \|\|
	!TLI.allowsMisalignedMemoryAccesses(LVT, SrcAS) \|\|
	!TLI.allowsMisalignedMemoryAccesses(LVT, DstAS))
	LVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
	}

	return LVT;
	};

	// This turns into unaligned loads. We only do this if the target natively
	// supports the MVT we'll be loading or if it is small enough (<= 4) that
	// we'll only produce a small number of byte loads.
	MVT LoadVT;
	unsigned NumBitsToCompare = CSize->getZExtValue() * 8;
	switch (NumBitsToCompare) {
	default:
	return false;
	case 16:
	LoadVT = MVT::i16;
	break;
	case 32:
	LoadVT = MVT::i32;
	break;
	case 64:
	case 128:
	case 256:
	LoadVT = hasFastLoadsAndCompare(NumBitsToCompare);
	break;
	}

	if (LoadVT == MVT::INVALID_SIMPLE_VALUE_TYPE)
	return false;

	SDValue LoadL = getMemCmpLoad(LHS, LoadVT, *this);
	SDValue LoadR = getMemCmpLoad(RHS, LoadVT, *this);

	// Bitcast to a wide integer type if the loads are vectors.
	if (LoadVT.isVector()) {
	EVT CmpVT = EVT::getIntegerVT(LHS->getContext(), LoadVT.getSizeInBits());
	LoadL = DAG.getBitcast(CmpVT, LoadL);
	LoadR = DAG.getBitcast(CmpVT, LoadR);
	}

	SDValue Cmp = DAG.getSetCC(getCurSDLoc(), MVT::i1, LoadL, LoadR, ISD::SETNE);
	processIntegerCallValue(I, Cmp, false);
	return true;
	}

	/// See if we can lower a memchr call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
	const Value *Src = I.getArgOperand(0);
	const Value *Char = I.getArgOperand(1);
	const Value *Length = I.getArgOperand(2);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Src), getValue(Char), getValue(Length),
	MachinePointerInfo(Src));
	if (Res.first.getNode()) {
	setValue(&I, Res.first);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a mempcpy call into an optimized form. If so, return
	/// true and lower it. Otherwise return false, and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
	SDValue Dst = getValue(I.getArgOperand(0));
	SDValue Src = getValue(I.getArgOperand(1));
	SDValue Size = getValue(I.getArgOperand(2));

	Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne();
	Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne();
	// DAG::getMemcpy needs Alignment to be defined.
	Align Alignment = std::min(DstAlign, SrcAlign);

	bool isVol = false;
	SDLoc sdl = getCurSDLoc();

	// In the mempcpy context we need to pass in a false value for isTailCall
	// because the return pointer needs to be adjusted by the size of
	// the copied memory.
	SDValue Root = isVol ? getRoot() : getMemoryRoot();
	SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
	/isTailCall=/false,
	MachinePointerInfo(I.getArgOperand(0)),
	MachinePointerInfo(I.getArgOperand(1)),
	I.getAAMetadata());
	assert(MC.getNode() != nullptr &&
	" memcpy should not be lowered as TailCall in mempcpy context ");
	DAG.setRoot(MC);

	// Check if Size needs to be truncated or extended.
	Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());

	// Adjust return pointer to point just past the last dst byte.
	SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
	Dst, Size);
	setValue(&I, DstPlusSize);
	return true;
	}

	/// See if we can lower a strcpy call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0),
	MachinePointerInfo(Arg1), isStpcpy);
	if (Res.first.getNode()) {
	setValue(&I, Res.first);
	DAG.setRoot(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strcmp call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0),
	MachinePointerInfo(Arg1));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, true);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strlen call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) {
	const Value *Arg0 = I.getArgOperand(0);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), MachinePointerInfo(Arg0));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, false);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a strnlen call into an optimized form. If so, return
	/// true and lower it, otherwise return false and it will be lowered like a
	/// normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) {
	const Value Arg0 = I.getArgOperand(0), Arg1 = I.getArgOperand(1);

	const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
	std::pair<SDValue, SDValue> Res =
	TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(),
	getValue(Arg0), getValue(Arg1),
	MachinePointerInfo(Arg0));
	if (Res.first.getNode()) {
	processIntegerCallValue(I, Res.first, false);
	PendingLoads.push_back(Res.second);
	return true;
	}

	return false;
	}

	/// See if we can lower a unary floating-point operation into an SDNode with
	/// the specified Opcode. If so, return true and lower it, otherwise return
	/// false and it will be lowered like a normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
	unsigned Opcode) {
	// We already checked this call's prototype; verify it doesn't modify errno.
	if (!I.onlyReadsMemory())
	return false;

	SDNodeFlags Flags;
	Flags.copyFMF(cast<FPMathOperator>(I));

	SDValue Tmp = getValue(I.getArgOperand(0));
	setValue(&I,
	DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp, Flags));
	return true;
	}

	/// See if we can lower a binary floating-point operation into an SDNode with
	/// the specified Opcode. If so, return true and lower it. Otherwise return
	/// false, and it will be lowered like a normal call.
	/// The caller already checked that \p I calls the appropriate LibFunc with a
	/// correct prototype.
	bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I,
	unsigned Opcode) {
	// We already checked this call's prototype; verify it doesn't modify errno.
	if (!I.onlyReadsMemory())
	return false;

	SDNodeFlags Flags;
	Flags.copyFMF(cast<FPMathOperator>(I));

	SDValue Tmp0 = getValue(I.getArgOperand(0));
	SDValue Tmp1 = getValue(I.getArgOperand(1));
	EVT VT = Tmp0.getValueType();
	setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1, Flags));
	return true;
	}

	void SelectionDAGBuilder::visitCall(const CallInst &I) {
	// Handle inline assembly differently.
	if (I.isInlineAsm()) {
	visitInlineAsm(I);
	return;
	}

	if (Function *F = I.getCalledFunction()) {
	diagnoseDontCall(I);

	if (F->isDeclaration()) {
	// Is this an LLVM intrinsic or a target-specific intrinsic?
	unsigned IID = F->getIntrinsicID();
	if (!IID)
	if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo())
	IID = II->getIntrinsicID(F);

	if (IID) {
	visitIntrinsicCall(I, IID);
	return;
	}
	}

	// Check for well-known libc/libm calls. If the function is internal, it
	// can't be a library call. Don't do the check if marked as nobuiltin for
	// some reason or the call site requires strict floating point semantics.
	LibFunc Func;
	if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
	F->hasName() && LibInfo->getLibFunc(*F, Func) &&
	LibInfo->hasOptimizedCodeGen(Func)) {
	switch (Func) {
	default: break;
	case LibFunc_bcmp:
	if (visitMemCmpBCmpCall(I))
	return;
	break;
	case LibFunc_copysign:
	case LibFunc_copysignf:
	case LibFunc_copysignl:
	// We already checked this call's prototype; verify it doesn't modify
	// errno.
	if (I.onlyReadsMemory()) {
	SDValue LHS = getValue(I.getArgOperand(0));
	SDValue RHS = getValue(I.getArgOperand(1));
	setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(),
	LHS.getValueType(), LHS, RHS));
	return;
	}
	break;
	case LibFunc_fabs:
	case LibFunc_fabsf:
	case LibFunc_fabsl:
	if (visitUnaryFloatCall(I, ISD::FABS))
	return;
	break;
	case LibFunc_fmin:
	case LibFunc_fminf:
	case LibFunc_fminl:
	if (visitBinaryFloatCall(I, ISD::FMINNUM))
	return;
	break;
	case LibFunc_fmax:
	case LibFunc_fmaxf:
	case LibFunc_fmaxl:
	if (visitBinaryFloatCall(I, ISD::FMAXNUM))
	return;
	break;
	case LibFunc_sin:
	case LibFunc_sinf:
	case LibFunc_sinl:
	if (visitUnaryFloatCall(I, ISD::FSIN))
	return;
	break;
	case LibFunc_cos:
	case LibFunc_cosf:
	case LibFunc_cosl:
	if (visitUnaryFloatCall(I, ISD::FCOS))
	return;
	break;
	case LibFunc_sqrt:
	case LibFunc_sqrtf:
	case LibFunc_sqrtl:
	case LibFunc_sqrt_finite:
	case LibFunc_sqrtf_finite:
	case LibFunc_sqrtl_finite:
	if (visitUnaryFloatCall(I, ISD::FSQRT))
	return;
	break;
	case LibFunc_floor:
	case LibFunc_floorf:
	case LibFunc_floorl:
	if (visitUnaryFloatCall(I, ISD::FFLOOR))
	return;
	break;
	case LibFunc_nearbyint:
	case LibFunc_nearbyintf:
	case LibFunc_nearbyintl:
	if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
	return;
	break;
	case LibFunc_ceil:
	case LibFunc_ceilf:
	case LibFunc_ceill:
	if (visitUnaryFloatCall(I, ISD::FCEIL))
	return;
	break;
	case LibFunc_rint:
	case LibFunc_rintf:
	case LibFunc_rintl:
	if (visitUnaryFloatCall(I, ISD::FRINT))
	return;
	break;
	case LibFunc_round:
	case LibFunc_roundf:
	case LibFunc_roundl:
	if (visitUnaryFloatCall(I, ISD::FROUND))
	return;
	break;
	case LibFunc_trunc:
	case LibFunc_truncf:
	case LibFunc_truncl:
	if (visitUnaryFloatCall(I, ISD::FTRUNC))
	return;
	break;
	case LibFunc_log2:
	case LibFunc_log2f:
	case LibFunc_log2l:
	if (visitUnaryFloatCall(I, ISD::FLOG2))
	return;
	break;
	case LibFunc_exp2:
	case LibFunc_exp2f:
	case LibFunc_exp2l:
	if (visitUnaryFloatCall(I, ISD::FEXP2))
	return;
	break;
	case LibFunc_memcmp:
	if (visitMemCmpBCmpCall(I))
	return;
	break;
	case LibFunc_mempcpy:
	if (visitMemPCpyCall(I))
	return;
	break;
	case LibFunc_memchr:
	if (visitMemChrCall(I))
	return;
	break;
	case LibFunc_strcpy:
	if (visitStrCpyCall(I, false))
	return;
	break;
	case LibFunc_stpcpy:
	if (visitStrCpyCall(I, true))
	return;
	break;
	case LibFunc_strcmp:
	if (visitStrCmpCall(I))
	return;
	break;
	case LibFunc_strlen:
	if (visitStrLenCall(I))
	return;
	break;
	case LibFunc_strnlen:
	if (visitStrNLenCall(I))
	return;
	break;
	}
	}
	}

	// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
	// have to do anything here to lower funclet bundles.
	// CFGuardTarget bundles are lowered in LowerCallTo.
	assert(!I.hasOperandBundlesOtherThan(
	{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
	LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
	LLVMContext::OB_clang_arc_attachedcall}) &&
	"Cannot lower calls with arbitrary operand bundles!");

	SDValue Callee = getValue(I.getCalledOperand());

	if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
	LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
	else
	// Check if we can potentially perform a tail call. More detailed checking
	// is be done within LowerCallTo, after more information about the call is
	// known.
	LowerCallTo(I, Callee, I.isTailCall(), I.isMustTailCall());
	}

	namespace {

	/// AsmOperandInfo - This contains information for each constraint that we are
	/// lowering.
	class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
	public:
	/// CallOperand - If this is the result output operand or a clobber
	/// this is null, otherwise it is the incoming operand to the CallInst.
	/// This gets modified as the asm is processed.
	SDValue CallOperand;

	/// AssignedRegs - If this is a register or register class operand, this
	/// contains the set of register corresponding to the operand.
	RegsForValue AssignedRegs;

	explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
	: TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
	}

	/// Whether or not this operand accesses memory
	bool hasMemory(const TargetLowering &TLI) const {
	// Indirect operand accesses access memory.
	if (isIndirect)
	return true;

	for (const auto &Code : Codes)
	if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
	return true;

	return false;
	}
	};


	} // end anonymous namespace

	/// Make sure that the output operand \p OpInfo and its corresponding input
	/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
	/// out).
	static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
	SDISelAsmOperandInfo &MatchingOpInfo,
	SelectionDAG &DAG) {
	if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
	return;

	const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
	const auto &TLI = DAG.getTargetLoweringInfo();

	std::pair<unsigned, const TargetRegisterClass *> MatchRC =
	TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
	OpInfo.ConstraintVT);
	std::pair<unsigned, const TargetRegisterClass *> InputRC =
	TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
	MatchingOpInfo.ConstraintVT);
	if ((OpInfo.ConstraintVT.isInteger() !=
	MatchingOpInfo.ConstraintVT.isInteger()) \|\|
	(MatchRC.second != InputRC.second)) {
	// FIXME: error out in a more elegant fashion
	report_fatal_error("Unsupported asm: input constraint"
	" with a matching output constraint of"
	" incompatible type!");
	}
	MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
	}

	/// Get a direct memory input to behave well as an indirect operand.
	/// This may introduce stores, hence the need for a \p Chain.
	/// \return The (possibly updated) chain.
	static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
	SDISelAsmOperandInfo &OpInfo,
	SelectionDAG &DAG) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	// If we don't have an indirect input, put it in the constpool if we can,
	// otherwise spill it to a stack slot.
	// TODO: This isn't quite right. We need to handle these according to
	// the addressing mode that the constraint wants. Also, this may take
	// an additional register for the computation and we don't want that
	// either.

	// If the operand is a float, integer, or vector constant, spill to a
	// constant pool entry to get its address.
	const Value *OpVal = OpInfo.CallOperandVal;
	if (isa<ConstantFP>(OpVal) \|\| isa<ConstantInt>(OpVal) \|\|
	isa<ConstantVector>(OpVal) \|\| isa<ConstantDataVector>(OpVal)) {
	OpInfo.CallOperand = DAG.getConstantPool(
	cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
	return Chain;
	}

	// Otherwise, create a stack slot and emit a store to it before the asm.
	Type *Ty = OpVal->getType();
	auto &DL = DAG.getDataLayout();
	uint64_t TySize = DL.getTypeAllocSize(Ty);
	MachineFunction &MF = DAG.getMachineFunction();
	int SSFI = MF.getFrameInfo().CreateStackObject(
	TySize, DL.getPrefTypeAlign(Ty), false);
	SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
	Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
	MachinePointerInfo::getFixedStack(MF, SSFI),
	TLI.getMemValueType(DL, Ty));
	OpInfo.CallOperand = StackSlot;

	return Chain;
	}

	/// GetRegistersForValue - Assign registers (virtual or physical) for the
	/// specified operand. We prefer to assign virtual registers, to allow the
	/// register allocator to handle the assignment process. However, if the asm
	/// uses features that we can't model on machineinstrs, we have SDISel do the
	/// allocation. This produces generally horrible, but correct, code.
	///
	/// OpInfo describes the operand
	/// RefOpInfo describes the matching operand if any, the operand otherwise
	static llvm::Optional<unsigned>
	getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
	SDISelAsmOperandInfo &OpInfo,
	SDISelAsmOperandInfo &RefOpInfo) {
	LLVMContext &Context = *DAG.getContext();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	MachineFunction &MF = DAG.getMachineFunction();
	SmallVector<unsigned, 4> Regs;
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();

	// No work to do for memory/address operands.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory \|\|
	OpInfo.ConstraintType == TargetLowering::C_Address)
	return None;

	// If this is a constraint for a single physreg, or a constraint for a
	// register class, find it.
	unsigned AssignedReg;
	const TargetRegisterClass *RC;
	std::tie(AssignedReg, RC) = TLI.getRegForInlineAsmConstraint(
	&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
	// RC is unset only on failure. Return immediately.
	if (!RC)
	return None;

	// Get the actual register value type. This is important, because the user
	// may have asked for (e.g.) the AX register in i32 type. We need to
	// remember that AX is actually i16 to get the right extension.
	const MVT RegVT = TRI.legalclasstypes_begin(RC);

	if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) {
	// If this is an FP operand in an integer register (or visa versa), or more
	// generally if the operand value disagrees with the register class we plan
	// to stick it in, fix the operand type.
	//
	// If this is an input value, the bitcast to the new type is done now.
	// Bitcast for output value is done at the end of visitInlineAsm().
	if ((OpInfo.Type == InlineAsm::isOutput \|\|
	OpInfo.Type == InlineAsm::isInput) &&
	!TRI.isTypeLegalForClass(*RC, OpInfo.ConstraintVT)) {
	// Try to convert to the first EVT that the reg class contains. If the
	// types are identical size, use a bitcast to convert (e.g. two differing
	// vector types). Note: output bitcast is done at the end of
	// visitInlineAsm().
	if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
	// Exclude indirect inputs while they are unsupported because the code
	// to perform the load is missing and thus OpInfo.CallOperand still
	// refers to the input address rather than the pointed-to value.
	if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect)
	OpInfo.CallOperand =
	DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand);
	OpInfo.ConstraintVT = RegVT;
	// If the operand is an FP value and we want it in integer registers,
	// use the corresponding integer type. This turns an f64 value into
	// i64, which can be passed with two i32 values on a 32-bit machine.
	} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
	MVT VT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits());
	if (OpInfo.Type == InlineAsm::isInput)
	OpInfo.CallOperand =
	DAG.getNode(ISD::BITCAST, DL, VT, OpInfo.CallOperand);
	OpInfo.ConstraintVT = VT;
	}
	}
	}

	// No need to allocate a matching input constraint since the constraint it's
	// matching to has already been allocated.
	if (OpInfo.isMatchingInputConstraint())
	return None;

	EVT ValueVT = OpInfo.ConstraintVT;
	if (OpInfo.ConstraintVT == MVT::Other)
	ValueVT = RegVT;

	// Initialize NumRegs.
	unsigned NumRegs = 1;
	if (OpInfo.ConstraintVT != MVT::Other)
	NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT);

	// If this is a constraint for a specific physical register, like {r17},
	// assign it now.

	// If this associated to a specific register, initialize iterator to correct
	// place. If virtual, make sure we have enough registers

	// Initialize iterator if necessary
	TargetRegisterClass::iterator I = RC->begin();
	MachineRegisterInfo &RegInfo = MF.getRegInfo();

	// Do not check for single registers.
	if (AssignedReg) {
	I = std::find(I, RC->end(), AssignedReg);
	if (I == RC->end()) {
	// RC does not contain the selected register, which indicates a
	// mismatch between the register and the required type/bitwidth.
	return {AssignedReg};
	}
	}

	for (; NumRegs; --NumRegs, ++I) {
	assert(I != RC->end() && "Ran out of registers to allocate!");
	Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
	Regs.push_back(R);
	}

	OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
	return None;
	}

	static unsigned
	findMatchingInlineAsmOperand(unsigned OperandNo,
	const std::vector<SDValue> &AsmNodeOperands) {
	// Scan until we find the definition we already emitted of this operand.
	unsigned CurOp = InlineAsm::Op_FirstOperand;
	for (; OperandNo; --OperandNo) {
	// Advance to the next operand.
	unsigned OpFlag =
	cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
	assert((InlineAsm::isRegDefKind(OpFlag) \|\|
	InlineAsm::isRegDefEarlyClobberKind(OpFlag) \|\|
	InlineAsm::isMemKind(OpFlag)) &&
	"Skipped past definitions?");
	CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
	}
	return CurOp;
	}

	namespace {

	class ExtraFlags {
	unsigned Flags = 0;

	public:
	explicit ExtraFlags(const CallBase &Call) {
	const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
	if (IA->hasSideEffects())
	Flags \|= InlineAsm::Extra_HasSideEffects;
	if (IA->isAlignStack())
	Flags \|= InlineAsm::Extra_IsAlignStack;
	if (Call.isConvergent())
	Flags \|= InlineAsm::Extra_IsConvergent;
	Flags \|= IA->getDialect() * InlineAsm::Extra_AsmDialect;
	}

	void update(const TargetLowering::AsmOperandInfo &OpInfo) {
	// Ideally, we would only check against memory constraints. However, the
	// meaning of an Other constraint can be target-specific and we can't easily
	// reason about it. Therefore, be conservative and set MayLoad/MayStore
	// for Other constraints as well.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory \|\|
	OpInfo.ConstraintType == TargetLowering::C_Other) {
	if (OpInfo.Type == InlineAsm::isInput)
	Flags \|= InlineAsm::Extra_MayLoad;
	else if (OpInfo.Type == InlineAsm::isOutput)
	Flags \|= InlineAsm::Extra_MayStore;
	else if (OpInfo.Type == InlineAsm::isClobber)
	Flags \|= (InlineAsm::Extra_MayLoad \| InlineAsm::Extra_MayStore);
	}
	}

	unsigned get() const { return Flags; }
	};

	} // end anonymous namespace

	/// visitInlineAsm - Handle a call to an InlineAsm object.
	void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
	const BasicBlock *EHPadBB) {
	const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());

	/// ConstraintOperands - Information about all of the constraints.
	SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands;

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
	DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call);

	// First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
	// AsmDialect, MayLoad, MayStore).
	bool HasSideEffect = IA->hasSideEffects();
	ExtraFlags ExtraInfo(Call);

	for (auto &T : TargetConstraints) {
	ConstraintOperands.push_back(SDISelAsmOperandInfo(T));
	SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();

	if (OpInfo.CallOperandVal)
	OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);

	if (!HasSideEffect)
	HasSideEffect = OpInfo.hasMemory(TLI);

	// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
	// FIXME: Could we compute this on OpInfo rather than T?

	// Compute the constraint code and ConstraintType to use.
	TLI.ComputeConstraintToUse(T, SDValue());

	if (T.ConstraintType == TargetLowering::C_Immediate &&
	OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
	// We've delayed emitting a diagnostic like the "n" constraint because
	// inlining could cause an integer showing up.
	return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) +
	"' expects an integer constant "
	"expression");

	ExtraInfo.update(T);
	}

	// We won't need to flush pending loads if this asm doesn't touch
	// memory and is nonvolatile.
	SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();

	bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow();
	if (EmitEHLabels) {
	assert(EHPadBB && "InvokeInst must have an EHPadBB");
	}
	bool IsCallBr = isa<CallBrInst>(Call);

	if (IsCallBr \|\| EmitEHLabels) {
	// If this is a callbr or invoke we need to flush pending exports since
	// inlineasm_br and invoke are terminators.
	// We need to do this before nodes are glued to the inlineasm_br node.
	Chain = getControlRoot();
	}

	MCSymbol *BeginLabel = nullptr;
	if (EmitEHLabels) {
	Chain = lowerStartEH(Chain, EHPadBB, BeginLabel);
	}

	// Second pass over the constraints: compute which constraint option to use.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	// If this is an output operand with a matching input operand, look up the
	// matching input. If their types mismatch, e.g. one is an integer, the
	// other is floating point, or their sizes are different, flag it as an
	// error.
	if (OpInfo.hasMatchingInput()) {
	SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
	patchMatchingInput(OpInfo, Input, DAG);
	}

	// Compute the constraint code and ConstraintType to use.
	TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);

	if ((OpInfo.ConstraintType == TargetLowering::C_Memory &&
	OpInfo.Type == InlineAsm::isClobber) \|\|
	OpInfo.ConstraintType == TargetLowering::C_Address)
	continue;

	// If this is a memory input, and if the operand is not indirect, do what we
	// need to provide an address for the memory input.
	if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
	!OpInfo.isIndirect) {
	assert((OpInfo.isMultipleAlternative \|\|
	(OpInfo.Type == InlineAsm::isInput)) &&
	"Can only indirectify direct input operands!");

	// Memory operands really want the address of the value.
	Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);

	// There is no longer a Value* corresponding to this operand.
	OpInfo.CallOperandVal = nullptr;

	// It is now an indirect operand.
	OpInfo.isIndirect = true;
	}

	}

	// AsmNodeOperands - The operands for the ISD::INLINEASM node.
	std::vector<SDValue> AsmNodeOperands;
	AsmNodeOperands.push_back(SDValue()); // reserve space for input chain
	AsmNodeOperands.push_back(DAG.getTargetExternalSymbol(
	IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout())));

	// If we have a !srcloc metadata node associated with it, we want to attach
	// this to the ultimately generated inline asm machineinstr. To do this, we
	// pass in the third operand as this (potentially null) inline asm MDNode.
	const MDNode *SrcLoc = Call.getMetadata("srcloc");
	AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));

	// Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
	// bits as operand 3.
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));

	// Third pass: Loop over operands to prepare DAG-level operands.. As part of
	// this, assign virtual and physical registers for inputs and otput.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	// Assign Registers.
	SDISelAsmOperandInfo &RefOpInfo =
	OpInfo.isMatchingInputConstraint()
	? ConstraintOperands[OpInfo.getMatchedOperand()]
	: OpInfo;
	const auto RegError =
	getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
	if (RegError) {
	const MachineFunction &MF = DAG.getMachineFunction();
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	const char *RegName = TRI.getName(RegError.value());
	emitInlineAsmError(Call, "register '" + Twine(RegName) +
	"' allocated for constraint '" +
	Twine(OpInfo.ConstraintCode) +
	"' does not match required type");
	return;
	}

	auto DetectWriteToReservedRegister = [&]() {
	const MachineFunction &MF = DAG.getMachineFunction();
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	for (unsigned Reg : OpInfo.AssignedRegs.Regs) {
	if (Register::isPhysicalRegister(Reg) &&
	TRI.isInlineAsmReadOnlyReg(MF, Reg)) {
	const char *RegName = TRI.getName(Reg);
	emitInlineAsmError(Call, "write to reserved register '" +
	Twine(RegName) + "'");
	return true;
	}
	}
	return false;
	};
	assert((OpInfo.ConstraintType != TargetLowering::C_Address \|\|
	(OpInfo.Type == InlineAsm::isInput &&
	!OpInfo.isMatchingInputConstraint())) &&
	"Only address as input operand is allowed.");

	switch (OpInfo.Type) {
	case InlineAsm::isOutput:
	if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
	unsigned ConstraintID =
	TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
	assert(ConstraintID != InlineAsm::Constraint_Unknown &&
	"Failed to convert memory constraint code to constraint id.");

	// Add information to the INLINEASM node to know about this output.
	unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
	OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID);
	AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(),
	MVT::i32));
	AsmNodeOperands.push_back(OpInfo.CallOperand);
	} else {
	// Otherwise, this outputs to a register (directly for C_Register /
	// C_RegisterClass, and a target-defined fashion for
	// C_Immediate/C_Other). Find a register that we can use.
	if (OpInfo.AssignedRegs.Regs.empty()) {
	emitInlineAsmError(
	Call, "couldn't allocate output register for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	if (DetectWriteToReservedRegister())
	return;

	// Add information to the INLINEASM node to know that this register is
	// set.
	OpInfo.AssignedRegs.AddInlineAsmOperands(
	OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
	: InlineAsm::Kind_RegDef,
	false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
	}
	break;

	case InlineAsm::isInput:
	case InlineAsm::isLabel: {
	SDValue InOperandVal = OpInfo.CallOperand;

	if (OpInfo.isMatchingInputConstraint()) {
	// If this is required to match an output register we have already set,
	// just use its register.
	auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
	AsmNodeOperands);
	unsigned OpFlag =
	cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
	if (InlineAsm::isRegDefKind(OpFlag) \|\|
	InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
	// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
	if (OpInfo.isIndirect) {
	// This happens on gcc/testsuite/gcc.dg/pr8788-1.c
	emitInlineAsmError(Call, "inline asm not supported yet: "
	"don't know how to handle tied "
	"indirect register inputs");
	return;
	}

	SmallVector<unsigned, 4> Regs;
	MachineFunction &MF = DAG.getMachineFunction();
	MachineRegisterInfo &MRI = MF.getRegInfo();
	const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
	auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
	Register TiedReg = R->getReg();
	MVT RegVT = R->getSimpleValueType(0);
	const TargetRegisterClass *RC =
	TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
	: RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
	: TRI.getMinimalPhysRegClass(TiedReg);
	unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
	for (unsigned i = 0; i != NumRegs; ++i)
	Regs.push_back(MRI.createVirtualRegister(RC));

	RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());

	SDLoc dl = getCurSDLoc();
	// Use the produced MatchedRegs object to
	MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
	MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
	true, OpInfo.getMatchedOperand(), dl,
	DAG, AsmNodeOperands);
	break;
	}

	assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
	assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
	"Unexpected number of operands");
	// Add information to the INLINEASM node to know about this input.
	// See InlineAsm.h isUseOperandTiedToDef.
	OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag);
	OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
	OpInfo.getMatchedOperand());
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
	AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
	break;
	}

	// Treat indirect 'X' constraint as memory.
	if (OpInfo.ConstraintType == TargetLowering::C_Other &&
	OpInfo.isIndirect)
	OpInfo.ConstraintType = TargetLowering::C_Memory;

	if (OpInfo.ConstraintType == TargetLowering::C_Immediate \|\|
	OpInfo.ConstraintType == TargetLowering::C_Other) {
	std::vector<SDValue> Ops;
	TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
	Ops, DAG);
	if (Ops.empty()) {
	if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
	if (isa<ConstantSDNode>(InOperandVal)) {
	emitInlineAsmError(Call, "value out of range for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	emitInlineAsmError(Call,
	"invalid operand for inline asm constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	// Add information to the INLINEASM node to know about this input.
	unsigned ResOpType =
	InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
	AsmNodeOperands.push_back(DAG.getTargetConstant(
	ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
	llvm::append_range(AsmNodeOperands, Ops);
	break;
	}

	if (OpInfo.ConstraintType == TargetLowering::C_Memory \|\|
	OpInfo.ConstraintType == TargetLowering::C_Address) {
	assert((OpInfo.isIndirect \|\|
	OpInfo.ConstraintType != TargetLowering::C_Memory) &&
	"Operand must be indirect to be a mem!");
	assert(InOperandVal.getValueType() ==
	TLI.getPointerTy(DAG.getDataLayout()) &&
	"Memory operands expect pointer values");

	unsigned ConstraintID =
	TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
	assert(ConstraintID != InlineAsm::Constraint_Unknown &&
	"Failed to convert memory constraint code to constraint id.");

	// Add information to the INLINEASM node to know about this input.
	unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
	ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID);
	AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
	getCurSDLoc(),
	MVT::i32));
	AsmNodeOperands.push_back(InOperandVal);
	break;
	}

	assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass \|\|
	OpInfo.ConstraintType == TargetLowering::C_Register) &&
	"Unknown constraint type!");

	// TODO: Support this.
	if (OpInfo.isIndirect) {
	emitInlineAsmError(
	Call, "Don't know how to handle indirect register inputs yet "
	"for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	// Copy the input into the appropriate registers.
	if (OpInfo.AssignedRegs.Regs.empty()) {
	emitInlineAsmError(Call,
	"couldn't allocate input reg for constraint '" +
	Twine(OpInfo.ConstraintCode) + "'");
	return;
	}

	if (DetectWriteToReservedRegister())
	return;

	SDLoc dl = getCurSDLoc();

	OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
	&Call);

	OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
	dl, DAG, AsmNodeOperands);
	break;
	}
	case InlineAsm::isClobber:
	// Add the clobbered value to the operand list, so that the register
	// allocator is aware that the physreg got clobbered.
	if (!OpInfo.AssignedRegs.Regs.empty())
	OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
	false, 0, getCurSDLoc(), DAG,
	AsmNodeOperands);
	break;
	}
	}

	// Finish up input operands. Set the input chain and add the flag last.
	AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
	if (Flag.getNode()) AsmNodeOperands.push_back(Flag);

	unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
	Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
	DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
	Flag = Chain.getValue(1);

	// Do additional work to generate outputs.

	SmallVector<EVT, 1> ResultVTs;
	SmallVector<SDValue, 1> ResultValues;
	SmallVector<SDValue, 8> OutChains;

	llvm::Type *CallResultType = Call.getType();
	ArrayRef<Type *> ResultTypes;
	if (StructType *StructResult = dyn_cast<StructType>(CallResultType))
	ResultTypes = StructResult->elements();
	else if (!CallResultType->isVoidTy())
	ResultTypes = makeArrayRef(CallResultType);

	auto CurResultType = ResultTypes.begin();
	auto handleRegAssign = [&](SDValue V) {
	assert(CurResultType != ResultTypes.end() && "Unexpected value");
	assert((*CurResultType)->isSized() && "Unexpected unsized type");
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
	++CurResultType;
	// If the type of the inline asm call site return value is different but has
	// same size as the type of the asm output bitcast it. One example of this
	// is for vectors with different width / number of elements. This can
	// happen for register classes that can contain multiple different value
	// types. The preg or vreg allocated may not have the same VT as was
	// expected.
	//
	// This can also happen for a return value that disagrees with the register
	// class it is put in, eg. a double in a general-purpose register on a
	// 32-bit machine.
	if (ResultVT != V.getValueType() &&
	ResultVT.getSizeInBits() == V.getValueSizeInBits())
	V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
	else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
	V.getValueType().isInteger()) {
	// If a result value was tied to an input value, the computed result
	// may have a wider width than the expected result. Extract the
	// relevant portion.
	V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
	}
	assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
	ResultVTs.push_back(ResultVT);
	ResultValues.push_back(V);
	};

	// Deal with output operands.
	for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
	if (OpInfo.Type == InlineAsm::isOutput) {
	SDValue Val;
	// Skip trivial output operands.
	if (OpInfo.AssignedRegs.Regs.empty())
	continue;

	switch (OpInfo.ConstraintType) {
	case TargetLowering::C_Register:
	case TargetLowering::C_RegisterClass:
	Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
	Chain, &Flag, &Call);
	break;
	case TargetLowering::C_Immediate:
	case TargetLowering::C_Other:
	Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
	OpInfo, DAG);
	break;
	case TargetLowering::C_Memory:
	break; // Already handled.
	case TargetLowering::C_Address:
	break; // Silence warning.
	case TargetLowering::C_Unknown:
	assert(false && "Unexpected unknown constraint");
	}

	// Indirect output manifest as stores. Record output chains.
	if (OpInfo.isIndirect) {
	const Value *Ptr = OpInfo.CallOperandVal;
	assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
	SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
	MachinePointerInfo(Ptr));
	OutChains.push_back(Store);
	} else {
	// generate CopyFromRegs to associated registers.
	assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
	if (Val.getOpcode() == ISD::MERGE_VALUES) {
	for (const SDValue &V : Val->op_values())
	handleRegAssign(V);
	} else
	handleRegAssign(Val);
	}
	}
	}

	// Set results.
	if (!ResultValues.empty()) {
	assert(CurResultType == ResultTypes.end() &&
	"Mismatch in number of ResultTypes");
	assert(ResultValues.size() == ResultTypes.size() &&
	"Mismatch in number of output operands in asm result");

	SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ResultVTs), ResultValues);
	setValue(&Call, V);
	}

	// Collect store chains.
	if (!OutChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);

	if (EmitEHLabels) {
	Chain = lowerEndEH(Chain, cast<InvokeInst>(&Call), EHPadBB, BeginLabel);
	}

	// Only Update Root if inline assembly has a memory effect.
	if (ResultValues.empty() \|\| HasSideEffect \|\| !OutChains.empty() \|\| IsCallBr \|\|
	EmitEHLabels)
	DAG.setRoot(Chain);
	}

	void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call,
	const Twine &Message) {
	LLVMContext &Ctx = *DAG.getContext();
	Ctx.emitError(&Call, Message);

	// Make sure we leave the DAG in a valid state
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs);

	if (ValueVTs.empty())
	return;

	SmallVector<SDValue, 1> Ops;
	for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i)
	Ops.push_back(DAG.getUNDEF(ValueVTs[i]));

	setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc()));
	}

	void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(0))));
	}

	void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const DataLayout &DL = DAG.getDataLayout();
	SDValue V = DAG.getVAArg(
	TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
	getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
	DL.getABITypeAlign(I.getType()).value());
	DAG.setRoot(V.getValue(1));

	if (I.getType()->isPointerTy())
	V = DAG.getPtrExtOrTrunc(
	V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
	setValue(&I, V);
	}

	void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(0))));
	}

	void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
	DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(),
	MVT::Other, getRoot(),
	getValue(I.getArgOperand(0)),
	getValue(I.getArgOperand(1)),
	DAG.getSrcValue(I.getArgOperand(0)),
	DAG.getSrcValue(I.getArgOperand(1))));
	}

	SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
	const Instruction &I,
	SDValue Op) {
	const MDNode *Range = I.getMetadata(LLVMContext::MD_range);
	if (!Range)
	return Op;

	ConstantRange CR = getConstantRangeFromMetadata(*Range);
	if (CR.isFullSet() \|\| CR.isEmptySet() \|\| CR.isUpperWrapped())
	return Op;

	APInt Lo = CR.getUnsignedMin();
	if (!Lo.isMinValue())
	return Op;

	APInt Hi = CR.getUnsignedMax();
	unsigned Bits = std::max(Hi.getActiveBits(),
	static_cast<unsigned>(IntegerType::MIN_INT_BITS));

	EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), Bits);

	SDLoc SL = getCurSDLoc();

	SDValue ZExt = DAG.getNode(ISD::AssertZext, SL, Op.getValueType(), Op,
	DAG.getValueType(SmallVT));
	unsigned NumVals = Op.getNode()->getNumValues();
	if (NumVals == 1)
	return ZExt;

	SmallVector<SDValue, 4> Ops;

	Ops.push_back(ZExt);
	for (unsigned I = 1; I != NumVals; ++I)
	Ops.push_back(Op.getValue(I));

	return DAG.getMergeValues(Ops, SL);
	}

	/// Populate a CallLowerinInfo (into \p CLI) based on the properties of
	/// the call being lowered.
	///
	/// This is a helper for lowering intrinsics that follow a target calling
	/// convention or require stack pointer adjustment. Only a subset of the
	/// intrinsic's operands need to participate in the calling convention.
	void SelectionDAGBuilder::populateCallLoweringInfo(
	TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
	unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
	bool IsPatchPoint) {
	TargetLowering::ArgListTy Args;
	Args.reserve(NumArgs);

	// Populate the argument list.
	// Attributes for args start at offset 1, after the return attribute.
	for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
	ArgI != ArgE; ++ArgI) {
	const Value *V = Call->getOperand(ArgI);

	assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");

	TargetLowering::ArgListEntry Entry;
	Entry.Node = getValue(V);
	Entry.Ty = V->getType();
	Entry.setAttributes(Call, ArgI);
	Args.push_back(Entry);
	}

	CLI.setDebugLoc(getCurSDLoc())
	.setChain(getRoot())
	.setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
	.setDiscardResult(Call->use_empty())
	.setIsPatchPoint(IsPatchPoint)
	.setIsPreallocated(
	Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
	}

	/// Add a stack map intrinsic call's live variable operands to a stackmap
	/// or patchpoint target node's operand list.
	///
	/// Constants are converted to TargetConstants purely as an optimization to
	/// avoid constant materialization and register allocation.
	///
	/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
	/// generate addess computation nodes, and so FinalizeISel can convert the
	/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
	/// address materialization and register allocation, but may also be required
	/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
	/// alloca in the entry block, then the runtime may assume that the alloca's
	/// StackMap location can be read immediately after compilation and that the
	/// location is valid at any point during execution (this is similar to the
	/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were
	/// only available in a register, then the runtime would need to trap when
	/// execution reaches the StackMap in order to read the alloca's location.
	static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx,
	const SDLoc &DL, SmallVectorImpl<SDValue> &Ops,
	SelectionDAGBuilder &Builder) {
	SelectionDAG &DAG = Builder.DAG;
	for (unsigned I = StartIdx; I < Call.arg_size(); I++) {
	SDValue Op = Builder.getValue(Call.getArgOperand(I));

	// Things on the stack are pointer-typed, meaning that they are already
	// legal and can be emitted directly to target nodes.
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
	Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType()));
	} else {
	// Otherwise emit a target independent node to be legalised.
	Ops.push_back(Builder.getValue(Call.getArgOperand(I)));
	}
	}
	}

	/// Lower llvm.experimental.stackmap.
	void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
	// void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>,
	// [live variables...])

	assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");

	SDValue Chain, InFlag, Callee, NullPtr;
	SmallVector<SDValue, 32> Ops;

	SDLoc DL = getCurSDLoc();
	Callee = getValue(CI.getCalledOperand());
	NullPtr = DAG.getIntPtrConstant(0, DL, true);

	// The stackmap intrinsic only records the live variables (the arguments
	// passed to it) and emits NOPS (if requested). Unlike the patchpoint
	// intrinsic, this won't be lowered to a function call. This means we don't
	// have to worry about calling conventions and target specific lowering code.
	// Instead we perform the call lowering right here.
	//
	// chain, flag = CALLSEQ_START(chain, 0, 0)
	// chain, flag = STACKMAP(id, nbytes, ..., chain, flag)
	// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
	//
	Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
	InFlag = Chain.getValue(1);

	// Add the STACKMAP operands, starting with DAG house-keeping.
	Ops.push_back(Chain);
	Ops.push_back(InFlag);

	// Add the <id>, <numShadowBytes> operands.
	//
	// These do not require legalisation, and can be emitted directly to target
	// constant nodes.
	SDValue ID = getValue(CI.getArgOperand(0));
	assert(ID.getValueType() == MVT::i64);
	SDValue IDConst = DAG.getTargetConstant(
	cast<ConstantSDNode>(ID)->getZExtValue(), DL, ID.getValueType());
	Ops.push_back(IDConst);

	SDValue Shad = getValue(CI.getArgOperand(1));
	assert(Shad.getValueType() == MVT::i32);
	SDValue ShadConst = DAG.getTargetConstant(
	cast<ConstantSDNode>(Shad)->getZExtValue(), DL, Shad.getValueType());
	Ops.push_back(ShadConst);

	// Add the live variables.
	addStackMapLiveVars(CI, 2, DL, Ops, *this);

	// Create the STACKMAP node.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
	InFlag = Chain.getValue(1);

	Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL);

	// Stackmaps don't generate values, so nothing goes into the NodeMap.

	// Set the root to the target-lowered call chain.
	DAG.setRoot(Chain);

	// Inform the Frame Information that we have a stackmap in this function.
	FuncInfo.MF->getFrameInfo().setHasStackMap();
	}

	/// Lower llvm.experimental.patchpoint directly to its target opcode.
	void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB,
	const BasicBlock *EHPadBB) {
	// void\|i64 @llvm.experimental.patchpoint.void\|i64(i64 <id>,
	// i32 <numBytes>,
	// i8* <target>,
	// i32 <numArgs>,
	// [Args...],
	// [live variables...])

	CallingConv::ID CC = CB.getCallingConv();
	bool IsAnyRegCC = CC == CallingConv::AnyReg;
	bool HasDef = !CB.getType()->isVoidTy();
	SDLoc dl = getCurSDLoc();
	SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos));

	// Handle immediate and symbolic callees.
	if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee))
	Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl,
	/isTarget=/true);
	else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee))
	Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(),
	SDLoc(SymbolicCallee),
	SymbolicCallee->getValueType(0));

	// Get the real number of arguments participating in the call <numArgs>
	SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos));
	unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue();

	// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
	// Intrinsics include all meta-operands up to but not including CC.
	unsigned NumMetaOpers = PatchPointOpers::CCPos;
	assert(CB.arg_size() >= NumMetaOpers + NumArgs &&
	"Not enough arguments provided to the patchpoint intrinsic");

	// For AnyRegCC the arguments are lowered later on manually.
	unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
	Type *ReturnTy =
	IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType();

	TargetLowering::CallLoweringInfo CLI(DAG);
	populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee,
	ReturnTy, true);
	std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);

	SDNode *CallEnd = Result.second.getNode();
	if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
	CallEnd = CallEnd->getOperand(0).getNode();

	/// Get a call instruction from the call sequence chain.
	/// Tail calls are not allowed.
	assert(CallEnd->getOpcode() == ISD::CALLSEQ_END &&
	"Expected a callseq node.");
	SDNode *Call = CallEnd->getOperand(0).getNode();
	bool HasGlue = Call->getGluedNode();

	// Replace the target specific call node with the patchable intrinsic.
	SmallVector<SDValue, 8> Ops;

	// Push the chain.
	Ops.push_back(*(Call->op_begin()));

	// Optionally, push the glue (if any).
	if (HasGlue)
	Ops.push_back(*(Call->op_end() - 1));

	// Push the register mask info.
	if (HasGlue)
	Ops.push_back(*(Call->op_end() - 2));
	else
	Ops.push_back(*(Call->op_end() - 1));

	// Add the <id> and <numBytes> constants.
	SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64));
	SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos));
	Ops.push_back(DAG.getTargetConstant(
	cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl,
	MVT::i32));

	// Add the callee.
	Ops.push_back(Callee);

	// Adjust <numArgs> to account for any arguments that have been passed on the
	// stack instead.
	// Call Node: Chain, Target, {Args}, RegMask, [Glue]
	unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3);
	NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs;
	Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32));

	// Add the calling convention
	Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32));

	// Add the arguments we omitted previously. The register allocator should
	// place these in any free register.
	if (IsAnyRegCC)
	for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i)
	Ops.push_back(getValue(CB.getArgOperand(i)));

	// Push the arguments from the call instruction.
	SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1;
	Ops.append(Call->op_begin() + 2, e);

	// Push live variables for the stack map.
	addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this);

	SDVTList NodeTys;
	if (IsAnyRegCC && HasDef) {
	// Create the return types based on the intrinsic definition
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SmallVector<EVT, 3> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs);
	assert(ValueVTs.size() == 1 && "Expected only one return value type.");

	// There is always a chain and a glue type at the end
	ValueVTs.push_back(MVT::Other);
	ValueVTs.push_back(MVT::Glue);
	NodeTys = DAG.getVTList(ValueVTs);
	} else
	NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	// Replace the target specific call node with a PATCHPOINT node.
	SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops);

	// Update the NodeMap.
	if (HasDef) {
	if (IsAnyRegCC)
	setValue(&CB, SDValue(PPV.getNode(), 0));
	else
	setValue(&CB, Result.first);
	}

	// Fixup the consumers of the intrinsic. The chain and glue may be used in the
	// call sequence. Furthermore the location of the chain and glue can change
	// when the AnyReg calling convention is used and the intrinsic returns a
	// value.
	if (IsAnyRegCC && HasDef) {
	SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)};
	SDValue To[] = {PPV.getValue(1), PPV.getValue(2)};
	DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
	} else
	DAG.ReplaceAllUsesWith(Call, PPV.getNode());
	DAG.DeleteNode(Call);

	// Inform the Frame Information that we have a patchpoint in this function.
	FuncInfo.MF->getFrameInfo().setHasPatchPoint();
	}

	void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
	unsigned Intrinsic) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	SDValue Op1 = getValue(I.getArgOperand(0));
	SDValue Op2;
	if (I.arg_size() > 1)
	Op2 = getValue(I.getArgOperand(1));
	SDLoc dl = getCurSDLoc();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	SDValue Res;
	SDNodeFlags SDFlags;
	if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
	SDFlags.copyFMF(*FPMO);

	switch (Intrinsic) {
	case Intrinsic::vector_reduce_fadd:
	if (SDFlags.hasAllowReassociation())
	Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
	DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2, SDFlags),
	SDFlags);
	else
	Res = DAG.getNode(ISD::VECREDUCE_SEQ_FADD, dl, VT, Op1, Op2, SDFlags);
	break;
	case Intrinsic::vector_reduce_fmul:
	if (SDFlags.hasAllowReassociation())
	Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
	DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2, SDFlags),
	SDFlags);
	else
	Res = DAG.getNode(ISD::VECREDUCE_SEQ_FMUL, dl, VT, Op1, Op2, SDFlags);
	break;
	case Intrinsic::vector_reduce_add:
	Res = DAG.getNode(ISD::VECREDUCE_ADD, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_mul:
	Res = DAG.getNode(ISD::VECREDUCE_MUL, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_and:
	Res = DAG.getNode(ISD::VECREDUCE_AND, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_or:
	Res = DAG.getNode(ISD::VECREDUCE_OR, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_xor:
	Res = DAG.getNode(ISD::VECREDUCE_XOR, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_smax:
	Res = DAG.getNode(ISD::VECREDUCE_SMAX, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_smin:
	Res = DAG.getNode(ISD::VECREDUCE_SMIN, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_umax:
	Res = DAG.getNode(ISD::VECREDUCE_UMAX, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_umin:
	Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
	break;
	case Intrinsic::vector_reduce_fmax:
	Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
	break;
	case Intrinsic::vector_reduce_fmin:
	Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
	break;
	default:
	llvm_unreachable("Unhandled vector reduce intrinsic");
	}
	setValue(&I, Res);
	}

	/// Returns an AttributeList representing the attributes applied to the return
	/// value of the given call.
	static AttributeList getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) {
	SmallVector<Attribute::AttrKind, 2> Attrs;
	if (CLI.RetSExt)
	Attrs.push_back(Attribute::SExt);
	if (CLI.RetZExt)
	Attrs.push_back(Attribute::ZExt);
	if (CLI.IsInReg)
	Attrs.push_back(Attribute::InReg);

	return AttributeList::get(CLI.RetTy->getContext(), AttributeList::ReturnIndex,
	Attrs);
	}

	/// TargetLowering::LowerCallTo - This is the default LowerCallTo
	/// implementation, which just calls LowerCall.
	/// FIXME: When all targets are
	/// migrated to using LowerCall, this hook should be integrated into SDISel.
	std::pair<SDValue, SDValue>
	TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
	// Handle the incoming return values from the call.
	CLI.Ins.clear();
	Type *OrigRetTy = CLI.RetTy;
	SmallVector<EVT, 4> RetTys;
	SmallVector<uint64_t, 4> Offsets;
	auto &DL = CLI.DAG.getDataLayout();
	ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);

	if (CLI.IsPostTypeLegalization) {
	// If we are lowering a libcall after legalization, split the return type.
	SmallVector<EVT, 4> OldRetTys;
	SmallVector<uint64_t, 4> OldOffsets;
	RetTys.swap(OldRetTys);
	Offsets.swap(OldOffsets);

	for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
	EVT RetVT = OldRetTys[i];
	uint64_t Offset = OldOffsets[i];
	MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
	unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
	unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
	RetTys.append(NumRegs, RegisterVT);
	for (unsigned j = 0; j != NumRegs; ++j)
	Offsets.push_back(Offset + j * RegisterVTByteSZ);
	}
	}

	SmallVector<ISD::OutputArg, 4> Outs;
	GetReturnInfo(CLI.CallConv, CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL);

	bool CanLowerReturn =
	this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(),
	CLI.IsVarArg, Outs, CLI.RetTy->getContext());

	SDValue DemoteStackSlot;
	int DemoteStackIdx = -100;
	if (!CanLowerReturn) {
	// FIXME: equivalent assert?
	// assert(!CS.hasInAllocaArgument() &&
	// "sret demotion is incompatible with inalloca");
	uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
	Align Alignment = DL.getPrefTypeAlign(CLI.RetTy);
	MachineFunction &MF = CLI.DAG.getMachineFunction();
	DemoteStackIdx =
	MF.getFrameInfo().CreateStackObject(TySize, Alignment, false);
	Type *StackSlotPtrType = PointerType::get(CLI.RetTy,
	DL.getAllocaAddrSpace());

	DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL));
	ArgListEntry Entry;
	Entry.Node = DemoteStackSlot;
	Entry.Ty = StackSlotPtrType;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Entry.IsInReg = false;
	Entry.IsSRet = true;
	Entry.IsNest = false;
	Entry.IsByVal = false;
	Entry.IsByRef = false;
	Entry.IsReturned = false;
	Entry.IsSwiftSelf = false;
	Entry.IsSwiftAsync = false;
	Entry.IsSwiftError = false;
	Entry.IsCFGuardTarget = false;
	Entry.Alignment = Alignment;
	CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
	CLI.NumFixedArgs += 1;
	+ CLI.getArgs()[0].IndirectType = CLI.RetTy;
	CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());

	// sret demotion isn't compatible with tail-calls, since the sret argument
	// points into the callers stack frame.
	CLI.IsTailCall = false;
	} else {
	bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
	CLI.RetTy, CLI.CallConv, CLI.IsVarArg, DL);
	for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
	ISD::ArgFlagsTy Flags;
	if (NeedsRegBlock) {
	Flags.setInConsecutiveRegs();
	if (I == RetTys.size() - 1)
	Flags.setInConsecutiveRegsLast();
	}
	EVT VT = RetTys[I];
	MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	ISD::InputArg MyFlags;
	MyFlags.Flags = Flags;
	MyFlags.VT = RegisterVT;
	MyFlags.ArgVT = VT;
	MyFlags.Used = CLI.IsReturnValueUsed;
	if (CLI.RetTy->isPointerTy()) {
	MyFlags.Flags.setPointer();
	MyFlags.Flags.setPointerAddrSpace(
	cast<PointerType>(CLI.RetTy)->getAddressSpace());
	}
	if (CLI.RetSExt)
	MyFlags.Flags.setSExt();
	if (CLI.RetZExt)
	MyFlags.Flags.setZExt();
	if (CLI.IsInReg)
	MyFlags.Flags.setInReg();
	CLI.Ins.push_back(MyFlags);
	}
	}
	}

	// We push in swifterror return as the last element of CLI.Ins.
	ArgListTy &Args = CLI.getArgs();
	if (supportSwiftError()) {
	for (const ArgListEntry &Arg : Args) {
	if (Arg.IsSwiftError) {
	ISD::InputArg MyFlags;
	MyFlags.VT = getPointerTy(DL);
	MyFlags.ArgVT = EVT(getPointerTy(DL));
	MyFlags.Flags.setSwiftError();
	CLI.Ins.push_back(MyFlags);
	}
	}
	}

	// Handle all of the outgoing arguments.
	CLI.Outs.clear();
	CLI.OutVals.clear();
	for (unsigned i = 0, e = Args.size(); i != e; ++i) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
	// FIXME: Split arguments if CLI.IsPostTypeLegalization
	Type *FinalType = Args[i].Ty;
	if (Args[i].IsByVal)
	FinalType = Args[i].IndirectType;
	bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
	FinalType, CLI.CallConv, CLI.IsVarArg, DL);
	for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
	++Value) {
	EVT VT = ValueVTs[Value];
	Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
	SDValue Op = SDValue(Args[i].Node.getNode(),
	Args[i].Node.getResNo() + Value);
	ISD::ArgFlagsTy Flags;

	// Certain targets (such as MIPS), may have a different ABI alignment
	// for a type depending on the context. Give the target a chance to
	// specify the alignment it wants.
	const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
	Flags.setOrigAlign(OriginalAlignment);

	if (Args[i].Ty->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(Args[i].Ty)->getAddressSpace());
	}
	if (Args[i].IsZExt)
	Flags.setZExt();
	if (Args[i].IsSExt)
	Flags.setSExt();
	if (Args[i].IsInReg) {
	// If we are using vectorcall calling convention, a structure that is
	// passed InReg - is surely an HVA
	if (CLI.CallConv == CallingConv::X86_VectorCall &&
	isa<StructType>(FinalType)) {
	// The first value of a structure is marked
	if (0 == Value)
	Flags.setHvaStart();
	Flags.setHva();
	}
	// Set InReg Flag
	Flags.setInReg();
	}
	if (Args[i].IsSRet)
	Flags.setSRet();
	if (Args[i].IsSwiftSelf)
	Flags.setSwiftSelf();
	if (Args[i].IsSwiftAsync)
	Flags.setSwiftAsync();
	if (Args[i].IsSwiftError)
	Flags.setSwiftError();
	if (Args[i].IsCFGuardTarget)
	Flags.setCFGuardTarget();
	if (Args[i].IsByVal)
	Flags.setByVal();
	if (Args[i].IsByRef)
	Flags.setByRef();
	if (Args[i].IsPreallocated) {
	Flags.setPreallocated();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// preallocated. This way we can know how many bytes we should've
	// allocated and how many bytes a callee cleanup function will pop. If
	// we port preallocated to more targets, we'll have to add custom
	// preallocated handling in the various CC lowering callbacks.
	Flags.setByVal();
	}
	if (Args[i].IsInAlloca) {
	Flags.setInAlloca();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// inalloca. This way we can know how many bytes we should've allocated
	// and how many bytes a callee cleanup function will pop. If we port
	// inalloca to more targets, we'll have to add custom inalloca handling
	// in the various CC lowering callbacks.
	Flags.setByVal();
	}
	Align MemAlign;
	if (Args[i].IsByVal \|\| Args[i].IsInAlloca \|\| Args[i].IsPreallocated) {
	unsigned FrameSize = DL.getTypeAllocSize(Args[i].IndirectType);
	Flags.setByValSize(FrameSize);

	// info is not there but there are cases it cannot get right.
	if (auto MA = Args[i].Alignment)
	MemAlign = *MA;
	else
	MemAlign = Align(getByValTypeAlignment(Args[i].IndirectType, DL));
	} else if (auto MA = Args[i].Alignment) {
	MemAlign = *MA;
	} else {
	MemAlign = OriginalAlignment;
	}
	Flags.setMemAlign(MemAlign);
	if (Args[i].IsNest)
	Flags.setNest();
	if (NeedsRegBlock)
	Flags.setInConsecutiveRegs();

	MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumParts = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	SmallVector<SDValue, 4> Parts(NumParts);
	ISD::NodeType ExtendKind = ISD::ANY_EXTEND;

	if (Args[i].IsSExt)
	ExtendKind = ISD::SIGN_EXTEND;
	else if (Args[i].IsZExt)
	ExtendKind = ISD::ZERO_EXTEND;

	// Conservatively only handle 'returned' on non-vectors that can be lowered,
	// for now.
	if (Args[i].IsReturned && !Op.getValueType().isVector() &&
	CanLowerReturn) {
	assert((CLI.RetTy == Args[i].Ty \|\|
	(CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
	CLI.RetTy->getPointerAddressSpace() ==
	Args[i].Ty->getPointerAddressSpace())) &&
	RetTys.size() == NumValues && "unexpected use of 'returned'");
	// Before passing 'returned' to the target lowering code, ensure that
	// either the register MVT and the actual EVT are the same size or that
	// the return value and argument are extended in the same way; in these
	// cases it's safe to pass the argument register value unchanged as the
	// return register value (although it's at the target's option whether
	// to do so)
	// TODO: allow code generation to take advantage of partially preserved
	// registers rather than clobbering the entire register when the
	// parameter extension method is not compatible with the return
	// extension method
	if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) \|\|
	(ExtendKind != ISD::ANY_EXTEND && CLI.RetSExt == Args[i].IsSExt &&
	CLI.RetZExt == Args[i].IsZExt))
	Flags.setReturned();
	}

	getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB,
	CLI.CallConv, ExtendKind);

	for (unsigned j = 0; j != NumParts; ++j) {
	// if it isn't first piece, alignment must be 1
	// For scalable vectors the scalable part is currently handled
	// by individual targets, so we just use the known minimum size here.
	ISD::OutputArg MyFlags(
	Flags, Parts[j].getValueType().getSimpleVT(), VT,
	i < CLI.NumFixedArgs, i,
	j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
	if (NumParts > 1 && j == 0)
	MyFlags.Flags.setSplit();
	else if (j != 0) {
	MyFlags.Flags.setOrigAlign(Align(1));
	if (j == NumParts - 1)
	MyFlags.Flags.setSplitEnd();
	}

	CLI.Outs.push_back(MyFlags);
	CLI.OutVals.push_back(Parts[j]);
	}

	if (NeedsRegBlock && Value == NumValues - 1)
	CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast();
	}
	}

	SmallVector<SDValue, 4> InVals;
	CLI.Chain = LowerCall(CLI, InVals);

	// Update CLI.InVals to use outside of this function.
	CLI.InVals = InVals;

	// Verify that the target's LowerCall behaved as expected.
	assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
	"LowerCall didn't return a valid chain!");
	assert((!CLI.IsTailCall \|\| InVals.empty()) &&
	"LowerCall emitted a return value for a tail call!");
	assert((CLI.IsTailCall \|\| InVals.size() == CLI.Ins.size()) &&
	"LowerCall didn't emit the correct number of values!");

	// For a tail call, the return value is merely live-out and there aren't
	// any nodes in the DAG representing it. Return a special value to
	// indicate that a tail call has been emitted and no more Instructions
	// should be processed in the current block.
	if (CLI.IsTailCall) {
	CLI.DAG.setRoot(CLI.Chain);
	return std::make_pair(SDValue(), SDValue());
	}

	#ifndef NDEBUG
	for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
	assert(InVals[i].getNode() && "LowerCall emitted a null value!");
	assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
	"LowerCall emitted a value with the wrong type!");
	}
	#endif

	SmallVector<SDValue, 4> ReturnValues;
	if (!CanLowerReturn) {
	// The instruction result is the result of loading from the
	// hidden sret parameter.
	SmallVector<EVT, 1> PVTs;
	Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());

	ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
	assert(PVTs.size() == 1 && "Pointers should fit in one register");
	EVT PtrVT = PVTs[0];

	unsigned NumValues = RetTys.size();
	ReturnValues.resize(NumValues);
	SmallVector<SDValue, 4> Chains(NumValues);

	// An aggregate return value cannot wrap around the address space, so
	// offsets to its parts don't wrap either.
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	MachineFunction &MF = CLI.DAG.getMachineFunction();
	Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx);
	for (unsigned i = 0; i < NumValues; ++i) {
	SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot,
	CLI.DAG.getConstant(Offsets[i], CLI.DL,
	PtrVT), Flags);
	SDValue L = CLI.DAG.getLoad(
	RetTys[i], CLI.DL, CLI.Chain, Add,
	MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
	DemoteStackIdx, Offsets[i]),
	HiddenSRetAlign);
	ReturnValues[i] = L;
	Chains[i] = L.getValue(1);
	}

	CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains);
	} else {
	// Collect the legal value parts into potentially illegal values
	// that correspond to the original function's return values.
	Optional<ISD::NodeType> AssertOp;
	if (CLI.RetSExt)
	AssertOp = ISD::AssertSext;
	else if (CLI.RetZExt)
	AssertOp = ISD::AssertZext;
	unsigned CurReg = 0;
	for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
	EVT VT = RetTys[I];
	MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);
	unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
	CLI.CallConv, VT);

	ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
	NumRegs, RegisterVT, VT, nullptr,
	CLI.CallConv, AssertOp));
	CurReg += NumRegs;
	}

	// For a function returning void, there is no return value. We can't create
	// such a node, so we just return a null return value in that case. In
	// that case, nothing will actually look at the value.
	if (ReturnValues.empty())
	return std::make_pair(SDValue(), CLI.Chain);
	}

	SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
	CLI.DAG.getVTList(RetTys), ReturnValues);
	return std::make_pair(Res, CLI.Chain);
	}

	/// Places new result values for the node in Results (their number
	/// and types must exactly match those of the original return values of
	/// the node), or leaves Results empty, which indicates that the node is not
	/// to be custom lowered after all.
	void TargetLowering::LowerOperationWrapper(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	SDValue Res = LowerOperation(SDValue(N, 0), DAG);

	if (!Res.getNode())
	return;

	// If the original node has one result, take the return value from
	// LowerOperation as is. It might not be result number 0.
	if (N->getNumValues() == 1) {
	Results.push_back(Res);
	return;
	}

	// If the original node has multiple results, then the return node should
	// have the same number of results.
	assert((N->getNumValues() == Res->getNumValues()) &&
	"Lowering returned the wrong number of results!");

	// Places new result values base on N result number.
	for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
	Results.push_back(Res.getValue(I));
	}

	SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	llvm_unreachable("LowerOperation not implemented for this target!");
	}

	void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V,
	unsigned Reg,
	ISD::NodeType ExtendType) {
	SDValue Op = getNonRegisterValue(V);
	assert((Op.getOpcode() != ISD::CopyFromReg \|\|
	cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
	"Copy from a reg to the same reg!");
	assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	// If this is an InlineAsm we have to match the registers required, not the
	// notional registers required by the type.

	RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(),
	None); // This is not an ABI copy.
	SDValue Chain = DAG.getEntryNode();

	if (ExtendType == ISD::ANY_EXTEND) {
	auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
	if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
	ExtendType = PreferredExtendIt->second;
	}
	RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
	PendingExports.push_back(Chain);
	}

	#include "llvm/CodeGen/SelectionDAGISel.h"

	/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
	/// entry block, return true. This includes arguments used by switches, since
	/// the switch may expand into multiple basic blocks.
	static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
	// With FastISel active, we may be splitting blocks, so force creation
	// of virtual registers for all non-dead arguments.
	if (FastISel)
	return A->use_empty();

	const BasicBlock &Entry = A->getParent()->front();
	for (const User *U : A->users())
	if (cast<Instruction>(U)->getParent() != &Entry \|\| isa<SwitchInst>(U))
	return false; // Use not in entry block.

	return true;
	}

	using ArgCopyElisionMapTy =
	DenseMap<const Argument *,
	std::pair<const AllocaInst , const StoreInst >>;

	/// Scan the entry block of the function in FuncInfo for arguments that look
	/// like copies into a local alloca. Record any copied arguments in
	/// ArgCopyElisionCandidates.
	static void
	findArgumentCopyElisionCandidates(const DataLayout &DL,
	FunctionLoweringInfo *FuncInfo,
	ArgCopyElisionMapTy &ArgCopyElisionCandidates) {
	// Record the state of every static alloca used in the entry block. Argument
	// allocas are all used in the entry block, so we need approximately as many
	// entries as we have arguments.
	enum StaticAllocaInfo { Unknown, Clobbered, Elidable };
	SmallDenseMap<const AllocaInst *, StaticAllocaInfo, 8> StaticAllocas;
	unsigned NumArgs = FuncInfo->Fn->arg_size();
	StaticAllocas.reserve(NumArgs * 2);

	auto GetInfoIfStaticAlloca = [&](const Value V) -> StaticAllocaInfo {
	if (!V)
	return nullptr;
	V = V->stripPointerCasts();
	const auto *AI = dyn_cast<AllocaInst>(V);
	if (!AI \|\| !AI->isStaticAlloca() \|\| !FuncInfo->StaticAllocaMap.count(AI))
	return nullptr;
	auto Iter = StaticAllocas.insert({AI, Unknown});
	return &Iter.first->second;
	};

	// Look for stores of arguments to static allocas. Look through bitcasts and
	// GEPs to handle type coercions, as long as the alloca is fully initialized
	// by the store. Any non-store use of an alloca escapes it and any subsequent
	// unanalyzed store might write it.
	// FIXME: Handle structs initialized with multiple stores.
	for (const Instruction &I : FuncInfo->Fn->getEntryBlock()) {
	// Look for stores, and handle non-store uses conservatively.
	const auto *SI = dyn_cast<StoreInst>(&I);
	if (!SI) {
	// We will look through cast uses, so ignore them completely.
	if (I.isCast())
	continue;
	// Ignore debug info and pseudo op intrinsics, they don't escape or store
	// to allocas.
	if (I.isDebugOrPseudoInst())
	continue;
	// This is an unknown instruction. Assume it escapes or writes to all
	// static alloca operands.
	for (const Use &U : I.operands()) {
	if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(U))
	*Info = StaticAllocaInfo::Clobbered;
	}
	continue;
	}

	// If the stored value is a static alloca, mark it as escaped.
	if (StaticAllocaInfo *Info = GetInfoIfStaticAlloca(SI->getValueOperand()))
	*Info = StaticAllocaInfo::Clobbered;

	// Check if the destination is a static alloca.
	const Value *Dst = SI->getPointerOperand()->stripPointerCasts();
	StaticAllocaInfo *Info = GetInfoIfStaticAlloca(Dst);
	if (!Info)
	continue;
	const AllocaInst *AI = cast<AllocaInst>(Dst);

	// Skip allocas that have been initialized or clobbered.
	if (*Info != StaticAllocaInfo::Unknown)
	continue;

	// Check if the stored value is an argument, and that this store fully
	// initializes the alloca.
	// If the argument type has padding bits we can't directly forward a pointer
	// as the upper bits may contain garbage.
	// Don't elide copies from the same argument twice.
	const Value *Val = SI->getValueOperand()->stripPointerCasts();
	const auto *Arg = dyn_cast<Argument>(Val);
	if (!Arg \|\| Arg->hasPassPointeeByValueCopyAttr() \|\|
	Arg->getType()->isEmptyTy() \|\|
	DL.getTypeStoreSize(Arg->getType()) !=
	DL.getTypeAllocSize(AI->getAllocatedType()) \|\|
	!DL.typeSizeEqualsStoreSize(Arg->getType()) \|\|
	ArgCopyElisionCandidates.count(Arg)) {
	*Info = StaticAllocaInfo::Clobbered;
	continue;
	}

	LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI
	<< '\n');

	// Mark this alloca and store for argument copy elision.
	*Info = StaticAllocaInfo::Elidable;
	ArgCopyElisionCandidates.insert({Arg, {AI, SI}});

	// Stop scanning if we've seen all arguments. This will happen early in -O0
	// builds, which is useful, because -O0 builds have large entry blocks and
	// many allocas.
	if (ArgCopyElisionCandidates.size() == NumArgs)
	break;
	}
	}

	/// Try to elide argument copies from memory into a local alloca. Succeeds if
	/// ArgVal is a load from a suitable fixed stack object.
	static void tryToElideArgumentCopy(
	FunctionLoweringInfo &FuncInfo, SmallVectorImpl<SDValue> &Chains,
	DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
	SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
	ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
	SDValue ArgVal, bool &ArgHasUses) {
	// Check if this is a load from a fixed stack object.
	auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
	if (!LNode)
	return;
	auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
	if (!FINode)
	return;

	// Check that the fixed stack object is the right size and alignment.
	// Look at the alignment that the user wrote on the alloca instead of looking
	// at the stack object.
	auto ArgCopyIter = ArgCopyElisionCandidates.find(&Arg);
	assert(ArgCopyIter != ArgCopyElisionCandidates.end());
	const AllocaInst *AI = ArgCopyIter->second.first;
	int FixedIndex = FINode->getIndex();
	int &AllocaIndex = FuncInfo.StaticAllocaMap[AI];
	int OldIndex = AllocaIndex;
	MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
	if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) {
	LLVM_DEBUG(
	dbgs() << " argument copy elision failed due to bad fixed stack "
	"object size\n");
	return;
	}
	Align RequiredAlignment = AI->getAlign();
	if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) {
	LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca "
	"greater than stack argument alignment ("
	<< DebugStr(RequiredAlignment) << " vs "
	<< DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n");
	return;
	}

	// Perform the elision. Delete the old stack object and replace its only use
	// in the variable info map. Mark the stack object as mutable.
	LLVM_DEBUG({
	dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n'
	<< " Replacing frame index " << OldIndex << " with " << FixedIndex
	<< '\n';
	});
	MFI.RemoveStackObject(OldIndex);
	MFI.setIsImmutableObjectIndex(FixedIndex, false);
	AllocaIndex = FixedIndex;
	ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
	Chains.push_back(ArgVal.getValue(1));

	// Avoid emitting code for the store implementing the copy.
	const StoreInst *SI = ArgCopyIter->second.second;
	ElidedArgCopyInstrs.insert(SI);

	// Check for uses of the argument again so that we can avoid exporting ArgVal
	// if it is't used by anything other than the store.
	for (const Value *U : Arg.users()) {
	if (U != SI) {
	ArgHasUses = true;
	break;
	}
	}
	}

	void SelectionDAGISel::LowerArguments(const Function &F) {
	SelectionDAG &DAG = SDB->DAG;
	SDLoc dl = SDB->getCurSDLoc();
	const DataLayout &DL = DAG.getDataLayout();
	SmallVector<ISD::InputArg, 16> Ins;

	// In Naked functions we aren't going to save any registers.
	if (F.hasFnAttribute(Attribute::Naked))
	return;

	if (!FuncInfo->CanLowerReturn) {
	// Put in an sret pointer parameter before all the other parameters.
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(),
	F.getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	ValueVTs);

	// NOTE: Assuming that a pointer will never break down to more than one VT
	// or one register.
	ISD::ArgFlagsTy Flags;
	Flags.setSRet();
	MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]);
	ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true,
	ISD::InputArg::NoArgIndex, 0);
	Ins.push_back(RetArg);
	}

	// Look for stores of arguments to static allocas. Mark such arguments with a
	// flag to ask the target to give us the memory location of that argument if
	// available.
	ArgCopyElisionMapTy ArgCopyElisionCandidates;
	findArgumentCopyElisionCandidates(DL, FuncInfo.get(),
	ArgCopyElisionCandidates);

	// Set up the incoming argument description vector.
	for (const Argument &Arg : F.args()) {
	unsigned ArgNo = Arg.getArgNo();
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
	bool isArgValueUsed = !Arg.use_empty();
	unsigned PartBase = 0;
	Type *FinalType = Arg.getType();
	if (Arg.hasAttribute(Attribute::ByVal))
	FinalType = Arg.getParamByValType();
	bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
	FinalType, F.getCallingConv(), F.isVarArg(), DL);
	for (unsigned Value = 0, NumValues = ValueVTs.size();
	Value != NumValues; ++Value) {
	EVT VT = ValueVTs[Value];
	Type ArgTy = VT.getTypeForEVT(DAG.getContext());
	ISD::ArgFlagsTy Flags;


	if (Arg.getType()->isPointerTy()) {
	Flags.setPointer();
	Flags.setPointerAddrSpace(
	cast<PointerType>(Arg.getType())->getAddressSpace());
	}
	if (Arg.hasAttribute(Attribute::ZExt))
	Flags.setZExt();
	if (Arg.hasAttribute(Attribute::SExt))
	Flags.setSExt();
	if (Arg.hasAttribute(Attribute::InReg)) {
	// If we are using vectorcall calling convention, a structure that is
	// passed InReg - is surely an HVA
	if (F.getCallingConv() == CallingConv::X86_VectorCall &&
	isa<StructType>(Arg.getType())) {
	// The first value of a structure is marked
	if (0 == Value)
	Flags.setHvaStart();
	Flags.setHva();
	}
	// Set InReg Flag
	Flags.setInReg();
	}
	if (Arg.hasAttribute(Attribute::StructRet))
	Flags.setSRet();
	if (Arg.hasAttribute(Attribute::SwiftSelf))
	Flags.setSwiftSelf();
	if (Arg.hasAttribute(Attribute::SwiftAsync))
	Flags.setSwiftAsync();
	if (Arg.hasAttribute(Attribute::SwiftError))
	Flags.setSwiftError();
	if (Arg.hasAttribute(Attribute::ByVal))
	Flags.setByVal();
	if (Arg.hasAttribute(Attribute::ByRef))
	Flags.setByRef();
	if (Arg.hasAttribute(Attribute::InAlloca)) {
	Flags.setInAlloca();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// inalloca. This way we can know how many bytes we should've allocated
	// and how many bytes a callee cleanup function will pop. If we port
	// inalloca to more targets, we'll have to add custom inalloca handling
	// in the various CC lowering callbacks.
	Flags.setByVal();
	}
	if (Arg.hasAttribute(Attribute::Preallocated)) {
	Flags.setPreallocated();
	// Set the byval flag for CCAssignFn callbacks that don't know about
	// preallocated. This way we can know how many bytes we should've
	// allocated and how many bytes a callee cleanup function will pop. If
	// we port preallocated to more targets, we'll have to add custom
	// preallocated handling in the various CC lowering callbacks.
	Flags.setByVal();
	}

	// Certain targets (such as MIPS), may have a different ABI alignment
	// for a type depending on the context. Give the target a chance to
	// specify the alignment it wants.
	const Align OriginalAlignment(
	TLI->getABIAlignmentForCallingConv(ArgTy, DL));
	Flags.setOrigAlign(OriginalAlignment);

	Align MemAlign;
	Type *ArgMemTy = nullptr;
	if (Flags.isByVal() \|\| Flags.isInAlloca() \|\| Flags.isPreallocated() \|\|
	Flags.isByRef()) {
	if (!ArgMemTy)
	ArgMemTy = Arg.getPointeeInMemoryValueType();

	uint64_t MemSize = DL.getTypeAllocSize(ArgMemTy);

	// For in-memory arguments, size and alignment should be passed from FE.
	// BE will guess if this info is not there but there are cases it cannot
	// get right.
	if (auto ParamAlign = Arg.getParamStackAlign())
	MemAlign = *ParamAlign;
	else if ((ParamAlign = Arg.getParamAlign()))
	MemAlign = *ParamAlign;
	else
	MemAlign = Align(TLI->getByValTypeAlignment(ArgMemTy, DL));
	if (Flags.isByRef())
	Flags.setByRefSize(MemSize);
	else
	Flags.setByValSize(MemSize);
	} else if (auto ParamAlign = Arg.getParamStackAlign()) {
	MemAlign = *ParamAlign;
	} else {
	MemAlign = OriginalAlignment;
	}
	Flags.setMemAlign(MemAlign);

	if (Arg.hasAttribute(Attribute::Nest))
	Flags.setNest();
	if (NeedsRegBlock)
	Flags.setInConsecutiveRegs();
	if (ArgCopyElisionCandidates.count(&Arg))
	Flags.setCopyElisionCandidate();
	if (Arg.hasAttribute(Attribute::Returned))
	Flags.setReturned();

	MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);
	unsigned NumRegs = TLI->getNumRegistersForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);
	for (unsigned i = 0; i != NumRegs; ++i) {
	// For scalable vectors, use the minimum size; individual targets
	// are responsible for handling scalable vector arguments and
	// return values.
	ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
	ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize());
	if (NumRegs > 1 && i == 0)
	MyFlags.Flags.setSplit();
	// if it isn't first piece, alignment must be 1
	else if (i > 0) {
	MyFlags.Flags.setOrigAlign(Align(1));
	if (i == NumRegs - 1)
	MyFlags.Flags.setSplitEnd();
	}
	Ins.push_back(MyFlags);
	}
	if (NeedsRegBlock && Value == NumValues - 1)
	Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast();
	PartBase += VT.getStoreSize().getKnownMinSize();
	}
	}

	// Call the target to set up the argument values.
	SmallVector<SDValue, 8> InVals;
	SDValue NewRoot = TLI->LowerFormalArguments(
	DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals);

	// Verify that the target's LowerFormalArguments behaved as expected.
	assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
	"LowerFormalArguments didn't return a valid chain!");
	assert(InVals.size() == Ins.size() &&
	"LowerFormalArguments didn't emit the correct number of values!");
	LLVM_DEBUG({
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	assert(InVals[i].getNode() &&
	"LowerFormalArguments emitted a null value!");
	assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
	"LowerFormalArguments emitted a value with the wrong type!");
	}
	});

	// Update the DAG with the new chain value resulting from argument lowering.
	DAG.setRoot(NewRoot);

	// Set up the argument values.
	unsigned i = 0;
	if (!FuncInfo->CanLowerReturn) {
	// Create a virtual register for the sret pointer, and put in a copy
	// from the sret argument into it.
	SmallVector<EVT, 1> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(),
	F.getReturnType()->getPointerTo(
	DAG.getDataLayout().getAllocaAddrSpace()),
	ValueVTs);
	MVT VT = ValueVTs[0].getSimpleVT();
	MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
	Optional<ISD::NodeType> AssertOp = None;
	SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
	nullptr, F.getCallingConv(), AssertOp);

	MachineFunction& MF = SDB->DAG.getMachineFunction();
	MachineRegisterInfo& RegInfo = MF.getRegInfo();
	Register SRetReg =
	RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
	FuncInfo->DemoteRegister = SRetReg;
	NewRoot =
	SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
	DAG.setRoot(NewRoot);

	// i indexes lowered arguments. Bump it past the hidden sret argument.
	++i;
	}

	SmallVector<SDValue, 4> Chains;
	DenseMap<int, int> ArgCopyElisionFrameIndexMap;
	for (const Argument &Arg : F.args()) {
	SmallVector<SDValue, 4> ArgValues;
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0)
	continue;

	bool ArgHasUses = !Arg.use_empty();

	// Elide the copying store if the target loaded this argument from a
	// suitable fixed stack object.
	if (Ins[i].Flags.isCopyElisionCandidate()) {
	tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
	ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
	InVals[i], ArgHasUses);
	}

	// If this argument is unused then remember its value. It is used to generate
	// debugging information.
	bool isSwiftErrorArg =
	TLI->supportSwiftError() &&
	Arg.hasAttribute(Attribute::SwiftError);
	if (!ArgHasUses && !isSwiftErrorArg) {
	SDB->setUnusedArgValue(&Arg, InVals[i]);

	// Also remember any frame index for use in FastISel.
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
	}

	for (unsigned Val = 0; Val != NumValues; ++Val) {
	EVT VT = ValueVTs[Val];
	MVT PartVT = TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(),
	F.getCallingConv(), VT);
	unsigned NumParts = TLI->getNumRegistersForCallingConv(
	*CurDAG->getContext(), F.getCallingConv(), VT);

	// Even an apparent 'unused' swifterror argument needs to be returned. So
	// we do generate a copy for it that can be used on return from the
	// function.
	if (ArgHasUses \|\| isSwiftErrorArg) {
	Optional<ISD::NodeType> AssertOp;
	if (Arg.hasAttribute(Attribute::SExt))
	AssertOp = ISD::AssertSext;
	else if (Arg.hasAttribute(Attribute::ZExt))
	AssertOp = ISD::AssertZext;

	ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
	PartVT, VT, nullptr,
	F.getCallingConv(), AssertOp));
	}

	i += NumParts;
	}

	// We don't need to do anything else for unused arguments.
	if (ArgValues.empty())
	continue;

	// Note down frame index.
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());

	SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
	SDB->getCurSDLoc());

	SDB->setValue(&Arg, Res);
	if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
	// We want to associate the argument with the frame index, among
	// involved operands, that correspond to the lowest address. The
	// getCopyFromParts function, called earlier, is swapping the order of
	// the operands to BUILD_PAIR depending on endianness. The result of
	// that swapping is that the least significant bits of the argument will
	// be in the first operand of the BUILD_PAIR node, and the most
	// significant bits will be in the second operand.
	unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
	if (LoadSDNode *LNode =
	dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
	if (FrameIndexSDNode *FI =
	dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
	FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
	}

	// Analyses past this point are naive and don't expect an assertion.
	if (Res.getOpcode() == ISD::AssertZext)
	Res = Res.getOperand(0);

	// Update the SwiftErrorVRegDefMap.
	if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
	unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
	if (Register::isVirtualRegister(Reg))
	SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
	Reg);
	}

	// If this argument is live outside of the entry block, insert a copy from
	// wherever we got it to the vreg that other BB's will reference it as.
	if (Res.getOpcode() == ISD::CopyFromReg) {
	// If we can, though, try to skip creating an unnecessary vreg.
	// FIXME: This isn't very clean... it would be nice to make this more
	// general.
	unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
	if (Register::isVirtualRegister(Reg)) {
	FuncInfo->ValueMap[&Arg] = Reg;
	continue;
	}
	}
	if (!isOnlyUsedInEntryBlock(&Arg, TM.Options.EnableFastISel)) {
	FuncInfo->InitializeRegForValue(&Arg);
	SDB->CopyToExportRegsIfNeeded(&Arg);
	}
	}

	if (!Chains.empty()) {
	Chains.push_back(NewRoot);
	NewRoot = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
	}

	DAG.setRoot(NewRoot);

	assert(i == InVals.size() && "Argument register count mismatch!");

	// If any argument copy elisions occurred and we have debug info, update the
	// stale frame indices used in the dbg.declare variable info table.
	MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
	if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
	for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
	auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
	if (I != ArgCopyElisionFrameIndexMap.end())
	VI.Slot = I->second;
	}
	}

	// Finally, if the target has anything special to do, allow it to do so.
	emitFunctionEntryCode();
	}

	/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to
	/// ensure constants are generated when needed. Remember the virtual registers
	/// that need to be added to the Machine PHI nodes as input. We cannot just
	/// directly add them, because expansion might result in multiple MBB's for one
	/// BB. As such, the start of the BB might correspond to a different MBB than
	/// the end.
	void
	SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	const Instruction *TI = LLVMBB->getTerminator();

	SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;

	// Check PHI nodes in successors that expect a value to be available from this
	// block.
	for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
	const BasicBlock *SuccBB = TI->getSuccessor(succ);
	if (!isa<PHINode>(SuccBB->begin())) continue;
	MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];

	// If this terminator has multiple identical successors (common for
	// switches), only handle each succ once.
	if (!SuccsHandled.insert(SuccMBB).second)
	continue;

	MachineBasicBlock::iterator MBBI = SuccMBB->begin();

	// At this point we know that there is a 1-1 correspondence between LLVM PHI
	// nodes and Machine PHI nodes, but the incoming operands have not been
	// emitted yet.
	for (const PHINode &PN : SuccBB->phis()) {
	// Ignore dead phi's.
	if (PN.use_empty())
	continue;

	// Skip empty types
	if (PN.getType()->isEmptyTy())
	continue;

	unsigned Reg;
	const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB);

	if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
	unsigned &RegOut = ConstantsOut[C];
	if (RegOut == 0) {
	RegOut = FuncInfo.CreateRegs(C);
	// We need to zero/sign extend ConstantInt phi operands to match
	// assumptions in FunctionLoweringInfo::ComputePHILiveOutRegInfo.
	ISD::NodeType ExtendType = ISD::ANY_EXTEND;
	if (auto *CI = dyn_cast<ConstantInt>(C))
	ExtendType = TLI.signExtendConstant(CI) ? ISD::SIGN_EXTEND
	: ISD::ZERO_EXTEND;
	CopyValueToVirtualRegister(C, RegOut, ExtendType);
	}
	Reg = RegOut;
	} else {
	DenseMap<const Value *, Register>::iterator I =
	FuncInfo.ValueMap.find(PHIOp);
	if (I != FuncInfo.ValueMap.end())
	Reg = I->second;
	else {
	assert(isa<AllocaInst>(PHIOp) &&
	FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
	"Didn't codegen value into a register!??");
	Reg = FuncInfo.CreateRegs(PHIOp);
	CopyValueToVirtualRegister(PHIOp, Reg);
	}
	}

	// Remember that this register needs to added to the machine PHI node as
	// the input for this MBB.
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs);
	for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
	EVT VT = ValueVTs[vti];
	unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
	for (unsigned i = 0, e = NumRegisters; i != e; ++i)
	FuncInfo.PHINodesToUpdate.push_back(
	std::make_pair(&*MBBI++, Reg + i));
	Reg += NumRegisters;
	}
	}
	}

	ConstantsOut.clear();
	}

	MachineBasicBlock SelectionDAGBuilder::NextBlock(MachineBasicBlock MBB) {
	MachineFunction::iterator I(MBB);
	if (++I == FuncInfo.MF->end())
	return nullptr;
	return &*I;
	}

	/// During lowering new call nodes can be created (such as memset, etc.).
	/// Those will become new roots of the current DAG, but complications arise
	/// when they are tail calls. In such cases, the call lowering will update
	/// the root, but the builder still needs to know that a tail call has been
	/// lowered in order to avoid generating an additional return.
	void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
	// If the node is null, we do have a tail call.
	if (MaybeTC.getNode() != nullptr)
	DAG.setRoot(MaybeTC);
	else
	HasTailCall = true;
	}

	void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
	MachineBasicBlock *SwitchMBB,
	MachineBasicBlock *DefaultMBB) {
	MachineFunction *CurMF = FuncInfo.MF;
	MachineBasicBlock *NextMBB = nullptr;
	MachineFunction::iterator BBI(W.MBB);
	if (++BBI != FuncInfo.MF->end())
	NextMBB = &*BBI;

	unsigned Size = W.LastCluster - W.FirstCluster + 1;

	BranchProbabilityInfo *BPI = FuncInfo.BPI;

	if (Size == 2 && W.MBB == SwitchMBB) {
	// If any two of the cases has the same destination, and if one value
	// is the same as the other, but has one bit unset that the other has set,
	// use bit manipulation to do two compares at once. For example:
	// "if (X == 6 \|\| X == 4)" -> "if ((X\|2) == 6)"
	// TODO: This could be extended to merge any 2 cases in switches with 3
	// cases.
	// TODO: Handle cases where W.CaseBB != SwitchBB.
	CaseCluster &Small = *W.FirstCluster;
	CaseCluster &Big = *W.LastCluster;

	if (Small.Low == Small.High && Big.Low == Big.High &&
	Small.MBB == Big.MBB) {
	const APInt &SmallValue = Small.Low->getValue();
	const APInt &BigValue = Big.Low->getValue();

	// Check that there is only one bit different.
	APInt CommonBit = BigValue ^ SmallValue;
	if (CommonBit.isPowerOf2()) {
	SDValue CondLHS = getValue(Cond);
	EVT VT = CondLHS.getValueType();
	SDLoc DL = getCurSDLoc();

	SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
	DAG.getConstant(CommonBit, DL, VT));
	SDValue Cond = DAG.getSetCC(
	DL, MVT::i1, Or, DAG.getConstant(BigValue \| SmallValue, DL, VT),
	ISD::SETEQ);

	// Update successor info.
	// Both Small and Big will jump to Small.BB, so we sum up the
	// probabilities.
	addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
	if (BPI)
	addSuccessorWithProb(
	SwitchMBB, DefaultMBB,
	// The default destination is the first successor in IR.
	BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
	else
	addSuccessorWithProb(SwitchMBB, DefaultMBB);

	// Insert the true branch.
	SDValue BrCond =
	DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond,
	DAG.getBasicBlock(Small.MBB));
	// Insert the false branch.
	BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
	DAG.getBasicBlock(DefaultMBB));

	DAG.setRoot(BrCond);
	return;
	}
	}
	}

	if (TM.getOptLevel() != CodeGenOpt::None) {
	// Here, we order cases by probability so the most likely case will be
	// checked first. However, two clusters can have the same probability in
	// which case their relative ordering is non-deterministic. So we use Low
	// as a tie-breaker as clusters are guaranteed to never overlap.
	llvm::sort(W.FirstCluster, W.LastCluster + 1,
	[](const CaseCluster &a, const CaseCluster &b) {
	return a.Prob != b.Prob ?
	a.Prob > b.Prob :
	a.Low->getValue().slt(b.Low->getValue());
	});

	// Rearrange the case blocks so that the last one falls through if possible
	// without changing the order of probabilities.
	for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
	--I;
	if (I->Prob > W.LastCluster->Prob)
	break;
	if (I->Kind == CC_Range && I->MBB == NextMBB) {
	std::swap(I, W.LastCluster);
	break;
	}
	}
	}

	// Compute total probability.
	BranchProbability DefaultProb = W.DefaultProb;
	BranchProbability UnhandledProbs = DefaultProb;
	for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
	UnhandledProbs += I->Prob;

	MachineBasicBlock *CurMBB = W.MBB;
	for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
	bool FallthroughUnreachable = false;
	MachineBasicBlock *Fallthrough;
	if (I == W.LastCluster) {
	// For the last cluster, fall through to the default destination.
	Fallthrough = DefaultMBB;
	FallthroughUnreachable = isa<UnreachableInst>(
	DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
	} else {
	Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
	CurMF->insert(BBI, Fallthrough);
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}
	UnhandledProbs -= I->Prob;

	switch (I->Kind) {
	case CC_JumpTable: {
	// FIXME: Optimize away range check based on pivot comparisons.
	JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
	SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;

	// The jump block hasn't been inserted yet; insert it here.
	MachineBasicBlock *JumpMBB = JT->MBB;
	CurMF->insert(BBI, JumpMBB);

	auto JumpProb = I->Prob;
	auto FallthroughProb = UnhandledProbs;

	// If the default statement is a target of the jump table, we evenly
	// distribute the default probability to successors of CurMBB. Also
	// update the probability on the edge from JumpMBB to Fallthrough.
	for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
	SE = JumpMBB->succ_end();
	SI != SE; ++SI) {
	if (*SI == DefaultMBB) {
	JumpProb += DefaultProb / 2;
	FallthroughProb -= DefaultProb / 2;
	JumpMBB->setSuccProbability(SI, DefaultProb / 2);
	JumpMBB->normalizeSuccProbs();
	break;
	}
	}

	if (FallthroughUnreachable)
	JTH->FallthroughUnreachable = true;

	if (!JTH->FallthroughUnreachable)
	addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
	addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
	CurMBB->normalizeSuccProbs();

	// The jump table header will be inserted in our current block, do the
	// range check, and fall through to our fallthrough block.
	JTH->HeaderBB = CurMBB;
	JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.

	// If we're in the right place, emit the jump table header right now.
	if (CurMBB == SwitchMBB) {
	visitJumpTableHeader(JT, JTH, SwitchMBB);
	JTH->Emitted = true;
	}
	break;
	}
	case CC_BitTests: {
	// FIXME: Optimize away range check based on pivot comparisons.
	BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];

	// The bit test blocks haven't been inserted yet; insert them here.
	for (BitTestCase &BTC : BTB->Cases)
	CurMF->insert(BBI, BTC.ThisBB);

	// Fill in fields of the BitTestBlock.
	BTB->Parent = CurMBB;
	BTB->Default = Fallthrough;

	BTB->DefaultProb = UnhandledProbs;
	// If the cases in bit test don't form a contiguous range, we evenly
	// distribute the probability on the edge to Fallthrough to two
	// successors of CurMBB.
	if (!BTB->ContiguousRange) {
	BTB->Prob += DefaultProb / 2;
	BTB->DefaultProb -= DefaultProb / 2;
	}

	if (FallthroughUnreachable)
	BTB->FallthroughUnreachable = true;

	// If we're in the right place, emit the bit test header right now.
	if (CurMBB == SwitchMBB) {
	visitBitTestHeader(*BTB, SwitchMBB);
	BTB->Emitted = true;
	}
	break;
	}
	case CC_Range: {
	const Value RHS, LHS, *MHS;
	ISD::CondCode CC;
	if (I->Low == I->High) {
	// Check Cond == I->Low.
	CC = ISD::SETEQ;
	LHS = Cond;
	RHS=I->Low;
	MHS = nullptr;
	} else {
	// Check I->Low <= Cond <= I->High.
	CC = ISD::SETLE;
	LHS = I->Low;
	MHS = Cond;
	RHS = I->High;
	}

	// If Fallthrough is unreachable, fold away the comparison.
	if (FallthroughUnreachable)
	CC = ISD::SETTRUE;

	// The false probability is the sum of all unhandled cases.
	CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
	getCurSDLoc(), I->Prob, UnhandledProbs);

	if (CurMBB == SwitchMBB)
	visitSwitchCase(CB, SwitchMBB);
	else
	SL->SwitchCases.push_back(CB);

	break;
	}
	}
	CurMBB = Fallthrough;
	}
	}

	unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
	CaseClusterIt First,
	CaseClusterIt Last) {
	return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
	if (X.Prob != CC.Prob)
	return X.Prob > CC.Prob;

	// Ties are broken by comparing the case value.
	return X.Low->getValue().slt(CC.Low->getValue());
	});
	}

	void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
	const SwitchWorkListItem &W,
	Value *Cond,
	MachineBasicBlock *SwitchMBB) {
	assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) &&
	"Clusters not sorted?");

	assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");

	// Balance the tree based on branch probabilities to create a near-optimal (in
	// terms of search time given key frequency) binary search tree. See e.g. Kurt
	// Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
	CaseClusterIt LastLeft = W.FirstCluster;
	CaseClusterIt FirstRight = W.LastCluster;
	auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
	auto RightProb = FirstRight->Prob + W.DefaultProb / 2;

	// Move LastLeft and FirstRight towards each other from opposite directions to
	// find a partitioning of the clusters which balances the probability on both
	// sides. If LeftProb and RightProb are equal, alternate which side is
	// taken to ensure 0-probability nodes are distributed evenly.
	unsigned I = 0;
	while (LastLeft + 1 < FirstRight) {
	if (LeftProb < RightProb \|\| (LeftProb == RightProb && (I & 1)))
	LeftProb += (++LastLeft)->Prob;
	else
	RightProb += (--FirstRight)->Prob;
	I++;
	}

	while (true) {
	// Our binary search tree differs from a typical BST in that ours can have up
	// to three values in each leaf. The pivot selection above doesn't take that
	// into account, which means the tree might require more nodes and be less
	// efficient. We compensate for this here.

	unsigned NumLeft = LastLeft - W.FirstCluster + 1;
	unsigned NumRight = W.LastCluster - FirstRight + 1;

	if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) {
	// If one side has less than 3 clusters, and the other has more than 3,
	// consider taking a cluster from the other side.

	if (NumLeft < NumRight) {
	// Consider moving the first cluster on the right to the left side.
	CaseCluster &CC = *FirstRight;
	unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
	unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
	if (LeftSideRank <= RightSideRank) {
	// Moving the cluster to the left does not demote it.
	++LastLeft;
	++FirstRight;
	continue;
	}
	} else {
	assert(NumRight < NumLeft);
	// Consider moving the last element on the left to the right side.
	CaseCluster &CC = *LastLeft;
	unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft);
	unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster);
	if (RightSideRank <= LeftSideRank) {
	// Moving the cluster to the right does not demot it.
	--LastLeft;
	--FirstRight;
	continue;
	}
	}
	}
	break;
	}

	assert(LastLeft + 1 == FirstRight);
	assert(LastLeft >= W.FirstCluster);
	assert(FirstRight <= W.LastCluster);

	// Use the first element on the right as pivot since we will make less-than
	// comparisons against it.
	CaseClusterIt PivotCluster = FirstRight;
	assert(PivotCluster > W.FirstCluster);
	assert(PivotCluster <= W.LastCluster);

	CaseClusterIt FirstLeft = W.FirstCluster;
	CaseClusterIt LastRight = W.LastCluster;

	const ConstantInt *Pivot = PivotCluster->Low;

	// New blocks will be inserted immediately after the current one.
	MachineFunction::iterator BBI(W.MBB);
	++BBI;

	// We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
	// we can branch to its destination directly if it's squeezed exactly in
	// between the known lower bound and Pivot - 1.
	MachineBasicBlock *LeftMBB;
	if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range &&
	FirstLeft->Low == W.GE &&
	(FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) {
	LeftMBB = FirstLeft->MBB;
	} else {
	LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, LeftMBB);
	WorkList.push_back(
	{LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}

	// Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a
	// single cluster, RHS.Low == Pivot, and we can branch to its destination
	// directly if RHS.High equals the current upper bound.
	MachineBasicBlock *RightMBB;
	if (FirstRight == LastRight && FirstRight->Kind == CC_Range &&
	W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) {
	RightMBB = FirstRight->MBB;
	} else {
	RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, RightMBB);
	WorkList.push_back(
	{RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
	// Put Cond in a virtual register to make it available from the new blocks.
	ExportFromCurrentBlock(Cond);
	}

	// Create the CaseBlock record that will be used to lower the branch.
	CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
	getCurSDLoc(), LeftProb, RightProb);

	if (W.MBB == SwitchMBB)
	visitSwitchCase(CB, SwitchMBB);
	else
	SL->SwitchCases.push_back(CB);
	}

	// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
	// from the swith statement.
	static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
	BranchProbability PeeledCaseProb) {
	if (PeeledCaseProb == BranchProbability::getOne())
	return BranchProbability::getZero();
	BranchProbability SwitchProb = PeeledCaseProb.getCompl();

	uint32_t Numerator = CaseProb.getNumerator();
	uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
	return BranchProbability(Numerator, std::max(Numerator, Denominator));
	}

	// Try to peel the top probability case if it exceeds the threshold.
	// Return current MachineBasicBlock for the switch statement if the peeling
	// does not occur.
	// If the peeling is performed, return the newly created MachineBasicBlock
	// for the peeled switch statement. Also update Clusters to remove the peeled
	// case. PeeledCaseProb is the BranchProbability for the peeled case.
	MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
	const SwitchInst &SI, CaseClusterVector &Clusters,
	BranchProbability &PeeledCaseProb) {
	MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
	// Don't perform if there is only one cluster or optimizing for size.
	if (SwitchPeelThreshold > 100 \|\| !FuncInfo.BPI \|\| Clusters.size() < 2 \|\|
	TM.getOptLevel() == CodeGenOpt::None \|\|
	SwitchMBB->getParent()->getFunction().hasMinSize())
	return SwitchMBB;

	BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
	unsigned PeeledCaseIndex = 0;
	bool SwitchPeeled = false;
	for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
	CaseCluster &CC = Clusters[Index];
	if (CC.Prob < TopCaseProb)
	continue;
	TopCaseProb = CC.Prob;
	PeeledCaseIndex = Index;
	SwitchPeeled = true;
	}
	if (!SwitchPeeled)
	return SwitchMBB;

	LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: "
	<< TopCaseProb << "\n");

	// Record the MBB for the peeled switch statement.
	MachineFunction::iterator BBI(SwitchMBB);
	++BBI;
	MachineBasicBlock *PeeledSwitchMBB =
	FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
	FuncInfo.MF->insert(BBI, PeeledSwitchMBB);

	ExportFromCurrentBlock(SI.getCondition());
	auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
	SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
	nullptr, nullptr, TopCaseProb.getCompl()};
	lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);

	Clusters.erase(PeeledCaseIt);
	for (CaseCluster &CC : Clusters) {
	LLVM_DEBUG(
	dbgs() << "Scale the probablity for one cluster, before scaling: "
	<< CC.Prob << "\n");
	CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
	LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
	}
	PeeledCaseProb = TopCaseProb;
	return PeeledSwitchMBB;
	}

	void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
	// Extract cases from the switch.
	BranchProbabilityInfo *BPI = FuncInfo.BPI;
	CaseClusterVector Clusters;
	Clusters.reserve(SI.getNumCases());
	for (auto I : SI.cases()) {
	MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
	const ConstantInt *CaseVal = I.getCaseValue();
	BranchProbability Prob =
	BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
	: BranchProbability(1, SI.getNumCases() + 1);
	Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
	}

	MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];

	// Cluster adjacent cases with the same destination. We do this at all
	// optimization levels because it's cheap to do and will make codegen faster
	// if there are many clusters.
	sortAndRangeify(Clusters);

	// The branch probablity of the peeled case.
	BranchProbability PeeledCaseProb = BranchProbability::getZero();
	MachineBasicBlock *PeeledSwitchMBB =
	peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);

	// If there is only the default destination, jump there directly.
	MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
	if (Clusters.empty()) {
	assert(PeeledSwitchMBB == SwitchMBB);
	SwitchMBB->addSuccessor(DefaultMBB);
	if (DefaultMBB != NextBlock(SwitchMBB)) {
	DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
	getControlRoot(), DAG.getBasicBlock(DefaultMBB)));
	}
	return;
	}

	SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI());
	SL->findBitTestClusters(Clusters, &SI);

	LLVM_DEBUG({
	dbgs() << "Case clusters: ";
	for (const CaseCluster &C : Clusters) {
	if (C.Kind == CC_JumpTable)
	dbgs() << "JT:";
	if (C.Kind == CC_BitTests)
	dbgs() << "BT:";

	C.Low->getValue().print(dbgs(), true);
	if (C.Low != C.High) {
	dbgs() << '-';
	C.High->getValue().print(dbgs(), true);
	}
	dbgs() << ' ';
	}
	dbgs() << '\n';
	});

	assert(!Clusters.empty());
	SwitchWorkList WorkList;
	CaseClusterIt First = Clusters.begin();
	CaseClusterIt Last = Clusters.end() - 1;
	auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
	// Scale the branchprobability for DefaultMBB if the peel occurs and
	// DefaultMBB is not replaced.
	if (PeeledCaseProb != BranchProbability::getZero() &&
	DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
	DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
	WorkList.push_back(
	{PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});

	while (!WorkList.empty()) {
	SwitchWorkListItem W = WorkList.pop_back_val();
	unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;

	if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
	!DefaultMBB->getParent()->getFunction().hasMinSize()) {
	// For optimized builds, lower large range as a balanced binary tree.
	splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
	continue;
	}

	lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB);
	}
	}

	void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	auto DL = getCurSDLoc();
	EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
	setValue(&I, DAG.getStepVector(DL, ResultVT));
	}

	void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	SDLoc DL = getCurSDLoc();
	SDValue V = getValue(I.getOperand(0));
	assert(VT == V.getValueType() && "Malformed vector.reverse!");

	if (VT.isScalableVector()) {
	setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
	return;
	}

	// Use VECTOR_SHUFFLE for the fixed-length vector
	// to maintain existing behavior.
	SmallVector<int, 8> Mask;
	unsigned NumElts = VT.getVectorMinNumElements();
	for (unsigned i = 0; i != NumElts; ++i)
	Mask.push_back(NumElts - 1 - i);

	setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
	}

	void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
	SmallVector<EVT, 4> ValueVTs;
	ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
	ValueVTs);
	unsigned NumValues = ValueVTs.size();
	if (NumValues == 0) return;

	SmallVector<SDValue, 4> Values(NumValues);
	SDValue Op = getValue(I.getOperand(0));

	for (unsigned i = 0; i != NumValues; ++i)
	Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i],
	SDValue(Op.getNode(), Op.getResNo() + i));

	setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
	DAG.getVTList(ValueVTs), Values));
	}

	void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());

	SDLoc DL = getCurSDLoc();
	SDValue V1 = getValue(I.getOperand(0));
	SDValue V2 = getValue(I.getOperand(1));
	int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();

	// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
	if (VT.isScalableVector()) {
	MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
	setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
	DAG.getConstant(Imm, DL, IdxVT)));
	return;
	}

	unsigned NumElts = VT.getVectorNumElements();

	uint64_t Idx = (NumElts + Imm) % NumElts;

	// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
	SmallVector<int, 8> Mask;
	for (unsigned i = 0; i < NumElts; ++i)
	Mask.push_back(Idx + i);
	setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
	}
	diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
	index 8dc8d381ad16..a63118067139 100644
	--- a/llvm/lib/CodeGen/TypePromotion.cpp
	+++ b/llvm/lib/CodeGen/TypePromotion.cpp
	@@ -1,954 +1,960 @@
	//===----- TypePromotion.cpp ----------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// This is an opcode based type promotion pass for small types that would
	/// otherwise be promoted during legalisation. This works around the limitations
	/// of selection dag for cyclic regions. The search begins from icmp
	/// instructions operands where a tree, consisting of non-wrapping or safe
	/// wrapping instructions, is built, checked and promoted if possible.
	///
	//===----------------------------------------------------------------------===//

	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/CodeGen/Passes.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/CodeGen/TargetPassConfig.h"
	#include "llvm/CodeGen/TargetSubtargetInfo.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Value.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Target/TargetMachine.h"

	#define DEBUG_TYPE "type-promotion"
	#define PASS_NAME "Type Promotion"

	using namespace llvm;

	static cl::opt<bool> DisablePromotion("disable-type-promotion", cl::Hidden,
	cl::init(false),
	cl::desc("Disable type promotion pass"));

	// The goal of this pass is to enable more efficient code generation for
	// operations on narrow types (i.e. types with < 32-bits) and this is a
	// motivating IR code example:
	//
	// define hidden i32 @cmp(i8 zeroext) {
	// %2 = add i8 %0, -49
	// %3 = icmp ult i8 %2, 3
	// ..
	// }
	//
	// The issue here is that i8 is type-legalized to i32 because i8 is not a
	// legal type. Thus, arithmetic is done in integer-precision, but then the
	// byte value is masked out as follows:
	//
	// t19: i32 = add t4, Constant:i32<-49>
	// t24: i32 = and t19, Constant:i32<255>
	//
	// Consequently, we generate code like this:
	//
	// subs r0, #49
	// uxtb r1, r0
	// cmp r1, #3
	//
	// This shows that masking out the byte value results in generation of
	// the UXTB instruction. This is not optimal as r0 already contains the byte
	// value we need, and so instead we can just generate:
	//
	// sub.w r1, r0, #49
	// cmp r1, #3
	//
	// We achieve this by type promoting the IR to i32 like so for this example:
	//
	// define i32 @cmp(i8 zeroext %c) {
	// %0 = zext i8 %c to i32
	// %c.off = add i32 %0, -49
	// %1 = icmp ult i32 %c.off, 3
	// ..
	// }
	//
	// For this to be valid and legal, we need to prove that the i32 add is
	// producing the same value as the i8 addition, and that e.g. no overflow
	// happens.
	//
	// A brief sketch of the algorithm and some terminology.
	// We pattern match interesting IR patterns:
	// - which have "sources": instructions producing narrow values (i8, i16), and
	// - they have "sinks": instructions consuming these narrow values.
	//
	// We collect all instruction connecting sources and sinks in a worklist, so
	// that we can mutate these instruction and perform type promotion when it is
	// legal to do so.

	namespace {
	class IRPromoter {
	LLVMContext &Ctx;
	unsigned PromotedWidth = 0;
	SetVector<Value *> &Visited;
	SetVector<Value *> &Sources;
	SetVector<Instruction *> &Sinks;
	SmallPtrSetImpl<Instruction *> &SafeWrap;
	IntegerType *ExtTy = nullptr;
	SmallPtrSet<Value *, 8> NewInsts;
	SmallPtrSet<Instruction *, 4> InstsToRemove;
	DenseMap<Value , SmallVector<Type , 4>> TruncTysMap;
	SmallPtrSet<Value *, 8> Promoted;

	void ReplaceAllUsersOfWith(Value From, Value To);
	void ExtendSources();
	void ConvertTruncs();
	void PromoteTree();
	void TruncateSinks();
	void Cleanup();

	public:
	IRPromoter(LLVMContext &C, unsigned Width,
	SetVector<Value > &visited, SetVector<Value > &sources,
	SetVector<Instruction *> &sinks,
	SmallPtrSetImpl<Instruction *> &wrap)
	: Ctx(C), PromotedWidth(Width), Visited(visited),
	Sources(sources), Sinks(sinks), SafeWrap(wrap) {
	ExtTy = IntegerType::get(Ctx, PromotedWidth);
	}

	void Mutate();
	};

	class TypePromotion : public FunctionPass {
	unsigned TypeSize = 0;
	LLVMContext *Ctx = nullptr;
	unsigned RegisterBitWidth = 0;
	SmallPtrSet<Value *, 16> AllVisited;
	SmallPtrSet<Instruction *, 8> SafeToPromote;
	SmallPtrSet<Instruction *, 4> SafeWrap;

	// Does V have the same size result type as TypeSize.
	bool EqualTypeSize(Value *V);
	// Does V have the same size, or narrower, result type as TypeSize.
	bool LessOrEqualTypeSize(Value *V);
	// Does V have a result type that is wider than TypeSize.
	bool GreaterThanTypeSize(Value *V);
	// Does V have a result type that is narrower than TypeSize.
	bool LessThanTypeSize(Value *V);
	// Should V be a leaf in the promote tree?
	bool isSource(Value *V);
	// Should V be a root in the promotion tree?
	bool isSink(Value *V);
	// Should we change the result type of V? It will result in the users of V
	// being visited.
	bool shouldPromote(Value *V);
	// Is I an add or a sub, which isn't marked as nuw, but where a wrapping
	// result won't affect the computation?
	bool isSafeWrap(Instruction *I);
	// Can V have its integer type promoted, or can the type be ignored.
	bool isSupportedType(Value *V);
	// Is V an instruction with a supported opcode or another value that we can
	// handle, such as constants and basic blocks.
	bool isSupportedValue(Value *V);
	// Is V an instruction thats result can trivially promoted, or has safe
	// wrapping.
	bool isLegalToPromote(Value *V);
	bool TryToPromote(Value *V, unsigned PromotedWidth);

	public:
	static char ID;

	TypePromotion() : FunctionPass(ID) {}

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<TargetTransformInfoWrapperPass>();
	AU.addRequired<TargetPassConfig>();
	AU.setPreservesCFG();
	}

	StringRef getPassName() const override { return PASS_NAME; }

	bool runOnFunction(Function &F) override;
	};

	} // namespace

	static bool GenerateSignBits(Instruction *I) {
	unsigned Opc = I->getOpcode();
	return Opc == Instruction::AShr \|\| Opc == Instruction::SDiv \|\|
	Opc == Instruction::SRem \|\| Opc == Instruction::SExt;
	}

	bool TypePromotion::EqualTypeSize(Value *V) {
	return V->getType()->getScalarSizeInBits() == TypeSize;
	}

	bool TypePromotion::LessOrEqualTypeSize(Value *V) {
	return V->getType()->getScalarSizeInBits() <= TypeSize;
	}

	bool TypePromotion::GreaterThanTypeSize(Value *V) {
	return V->getType()->getScalarSizeInBits() > TypeSize;
	}

	bool TypePromotion::LessThanTypeSize(Value *V) {
	return V->getType()->getScalarSizeInBits() < TypeSize;
	}

	/// Return true if the given value is a source in the use-def chain, producing
	/// a narrow 'TypeSize' value. These values will be zext to start the promotion
	/// of the tree to i32. We guarantee that these won't populate the upper bits
	/// of the register. ZExt on the loads will be free, and the same for call
	/// return values because we only accept ones that guarantee a zeroext ret val.
	/// Many arguments will have the zeroext attribute too, so those would be free
	/// too.
	bool TypePromotion::isSource(Value *V) {
	if (!isa<IntegerType>(V->getType()))
	return false;

	// TODO Allow zext to be sources.
	if (isa<Argument>(V))
	return true;
	else if (isa<LoadInst>(V))
	return true;
	else if (isa<BitCastInst>(V))
	return true;
	else if (auto *Call = dyn_cast<CallInst>(V))
	return Call->hasRetAttr(Attribute::AttrKind::ZExt);
	else if (auto *Trunc = dyn_cast<TruncInst>(V))
	return EqualTypeSize(Trunc);
	return false;
	}

	/// Return true if V will require any promoted values to be truncated for the
	/// the IR to remain valid. We can't mutate the value type of these
	/// instructions.
	bool TypePromotion::isSink(Value *V) {
	// TODO The truncate also isn't actually necessary because we would already
	// proved that the data value is kept within the range of the original data
	// type. We currently remove any truncs inserted for handling zext sinks.

	// Sinks are:
	// - points where the value in the register is being observed, such as an
	// icmp, switch or store.
	// - points where value types have to match, such as calls and returns.
	// - zext are included to ease the transformation and are generally removed
	// later on.
	if (auto *Store = dyn_cast<StoreInst>(V))
	return LessOrEqualTypeSize(Store->getValueOperand());
	if (auto *Return = dyn_cast<ReturnInst>(V))
	return LessOrEqualTypeSize(Return->getReturnValue());
	if (auto *ZExt = dyn_cast<ZExtInst>(V))
	return GreaterThanTypeSize(ZExt);
	if (auto *Switch = dyn_cast<SwitchInst>(V))
	return LessThanTypeSize(Switch->getCondition());
	if (auto *ICmp = dyn_cast<ICmpInst>(V))
	return ICmp->isSigned() \|\| LessThanTypeSize(ICmp->getOperand(0));

	return isa<CallInst>(V);
	}

	/// Return whether this instruction can safely wrap.
	bool TypePromotion::isSafeWrap(Instruction *I) {
	// We can support a potentially wrapping instruction (I) if:
	// - It is only used by an unsigned icmp.
	// - The icmp uses a constant.
	// - The wrapping value (I) is decreasing, i.e would underflow - wrapping
	// around zero to become a larger number than before.
	// - The wrapping instruction (I) also uses a constant.
	//
	// We can then use the two constants to calculate whether the result would
	// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
	// just underflows the range, the icmp would give the same result whether the
	// result has been truncated or not. We calculate this by:
	// - Zero extending both constants, if needed, to RegisterBitWidth.
	// - Take the absolute value of I's constant, adding this to the icmp const.
	// - Check that this value is not out of range for small type. If it is, it
	// means that it has underflowed enough to wrap around the icmp constant.
	//
	// For example:
	//
	// %sub = sub i8 %a, 2
	// %cmp = icmp ule i8 %sub, 254
	//
	// If %a = 0, %sub = -2 == FE == 254
	// But if this is evalulated as a i32
	// %sub = -2 == FF FF FF FE == 4294967294
	// So the unsigned compares (i8 and i32) would not yield the same result.
	//
	// Another way to look at it is:
	// %a - 2 <= 254
	// %a + 2 <= 254 + 2
	// %a <= 256
	// And we can't represent 256 in the i8 format, so we don't support it.
	//
	// Whereas:
	//
	// %sub i8 %a, 1
	// %cmp = icmp ule i8 %sub, 254
	//
	// If %a = 0, %sub = -1 == FF == 255
	// As i32:
	// %sub = -1 == FF FF FF FF == 4294967295
	//
	// In this case, the unsigned compare results would be the same and this
	// would also be true for ult, uge and ugt:
	// - (255 < 254) == (0xFFFFFFFF < 254) == false
	// - (255 <= 254) == (0xFFFFFFFF <= 254) == false
	// - (255 > 254) == (0xFFFFFFFF > 254) == true
	// - (255 >= 254) == (0xFFFFFFFF >= 254) == true
	//
	// To demonstrate why we can't handle increasing values:
	//
	// %add = add i8 %a, 2
	// %cmp = icmp ult i8 %add, 127
	//
	// If %a = 254, %add = 256 == (i8 1)
	// As i32:
	// %add = 256
	//
	// (1 < 127) != (256 < 127)

	unsigned Opc = I->getOpcode();
	if (Opc != Instruction::Add && Opc != Instruction::Sub)
	return false;

	if (!I->hasOneUse() \|\| !isa<ICmpInst>(*I->user_begin()) \|\|
	!isa<ConstantInt>(I->getOperand(1)))
	return false;

	// Don't support an icmp that deals with sign bits.
	auto CI = cast<ICmpInst>(I->user_begin());
	if (CI->isSigned() \|\| CI->isEquality())
	return false;

	ConstantInt *ICmpConstant = nullptr;
	if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
	ICmpConstant = Const;
	else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
	ICmpConstant = Const;
	else
	return false;

	const APInt &ICmpConst = ICmpConstant->getValue();
	APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
	if (Opc == Instruction::Sub)
	OverflowConst = -OverflowConst;
	if (!OverflowConst.isNonPositive())
	return false;

	// Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
	// zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
	// zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
	if (OverflowConst.sgt(ICmpConst)) {
	LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
	<< "const of " << *I << "\n");
	SafeWrap.insert(I);
	return true;
	} else {
	LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
	<< "const of " << I << " and " << CI << "\n");
	SafeWrap.insert(I);
	SafeWrap.insert(CI);
	return true;
	}
	return false;
	}

	bool TypePromotion::shouldPromote(Value *V) {
	if (!isa<IntegerType>(V->getType()) \|\| isSink(V))
	return false;

	if (isSource(V))
	return true;

	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	return false;

	if (isa<ICmpInst>(I))
	return false;

	return true;
	}

	/// Return whether we can safely mutate V's type to ExtTy without having to be
	/// concerned with zero extending or truncation.
	static bool isPromotedResultSafe(Instruction *I) {
	if (GenerateSignBits(I))
	return false;

	if (!isa<OverflowingBinaryOperator>(I))
	return true;

	return I->hasNoUnsignedWrap();
	}

	void IRPromoter::ReplaceAllUsersOfWith(Value From, Value To) {
	SmallVector<Instruction *, 4> Users;
	Instruction *InstTo = dyn_cast<Instruction>(To);
	bool ReplacedAll = true;

	LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << From << " with " << To
	<< "\n");

	for (Use &U : From->uses()) {
	auto *User = cast<Instruction>(U.getUser());
	if (InstTo && User->isIdenticalTo(InstTo)) {
	ReplacedAll = false;
	continue;
	}
	Users.push_back(User);
	}

	for (auto *U : Users)
	U->replaceUsesOfWith(From, To);

	if (ReplacedAll)
	if (auto *I = dyn_cast<Instruction>(From))
	InstsToRemove.insert(I);
	}

	void IRPromoter::ExtendSources() {
	IRBuilder<> Builder{Ctx};

	auto InsertZExt = [&](Value V, Instruction InsertPt) {
	assert(V->getType() != ExtTy && "zext already extends to i32");
	LLVM_DEBUG(dbgs() << "IR Promotion: Inserting ZExt for " << *V << "\n");
	Builder.SetInsertPoint(InsertPt);
	if (auto *I = dyn_cast<Instruction>(V))
	Builder.SetCurrentDebugLocation(I->getDebugLoc());

	Value *ZExt = Builder.CreateZExt(V, ExtTy);
	if (auto *I = dyn_cast<Instruction>(ZExt)) {
	if (isa<Argument>(V))
	I->moveBefore(InsertPt);
	else
	I->moveAfter(InsertPt);
	NewInsts.insert(I);
	}

	ReplaceAllUsersOfWith(V, ZExt);
	};

	// Now, insert extending instructions between the sources and their users.
	LLVM_DEBUG(dbgs() << "IR Promotion: Promoting sources:\n");
	for (auto *V : Sources) {
	LLVM_DEBUG(dbgs() << " - " << *V << "\n");
	if (auto *I = dyn_cast<Instruction>(V))
	InsertZExt(I, I);
	else if (auto *Arg = dyn_cast<Argument>(V)) {
	BasicBlock &BB = Arg->getParent()->front();
	InsertZExt(Arg, &*BB.getFirstInsertionPt());
	} else {
	llvm_unreachable("unhandled source that needs extending");
	}
	Promoted.insert(V);
	}
	}

	void IRPromoter::PromoteTree() {
	LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");

	// Mutate the types of the instructions within the tree. Here we handle
	// constant operands.
	for (auto *V : Visited) {
	if (Sources.count(V))
	continue;

	auto *I = cast<Instruction>(V);
	if (Sinks.count(I))
	continue;

	for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
	Value *Op = I->getOperand(i);
	if ((Op->getType() == ExtTy) \|\| !isa<IntegerType>(Op->getType()))
	continue;

	if (auto *Const = dyn_cast<ConstantInt>(Op)) {
	// For subtract, we don't need to sext the constant. We only put it in
	// SafeWrap because SafeWrap.size() is used elsewhere.
	// For cmp, we need to sign extend a constant appearing in either
	// operand. For add, we should only sign extend the RHS.
	Constant *NewConst = (SafeWrap.contains(I) &&
	(I->getOpcode() == Instruction::ICmp \|\| i == 1) &&
	I->getOpcode() != Instruction::Sub)
	? ConstantExpr::getSExt(Const, ExtTy)
	: ConstantExpr::getZExt(Const, ExtTy);
	I->setOperand(i, NewConst);
	} else if (isa<UndefValue>(Op))
	I->setOperand(i, ConstantInt::get(ExtTy, 0));
	}

	// Mutate the result type, unless this is an icmp or switch.
	if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
	I->mutateType(ExtTy);
	Promoted.insert(I);
	}
	}
	}

	void IRPromoter::TruncateSinks() {
	LLVM_DEBUG(dbgs() << "IR Promotion: Fixing up the sinks:\n");

	IRBuilder<> Builder{Ctx};

	auto InsertTrunc = [&](Value V, Type TruncTy) -> Instruction * {
	if (!isa<Instruction>(V) \|\| !isa<IntegerType>(V->getType()))
	return nullptr;

	if ((!Promoted.count(V) && !NewInsts.count(V)) \|\| Sources.count(V))
	return nullptr;

	LLVM_DEBUG(dbgs() << "IR Promotion: Creating " << *TruncTy << " Trunc for "
	<< *V << "\n");
	Builder.SetInsertPoint(cast<Instruction>(V));
	auto *Trunc = dyn_cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
	if (Trunc)
	NewInsts.insert(Trunc);
	return Trunc;
	};

	// Fix up any stores or returns that use the results of the promoted
	// chain.
	for (auto *I : Sinks) {
	LLVM_DEBUG(dbgs() << "IR Promotion: For Sink: " << *I << "\n");

	// Handle calls separately as we need to iterate over arg operands.
	if (auto *Call = dyn_cast<CallInst>(I)) {
	for (unsigned i = 0; i < Call->arg_size(); ++i) {
	Value *Arg = Call->getArgOperand(i);
	Type *Ty = TruncTysMap[Call][i];
	if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
	Trunc->moveBefore(Call);
	Call->setArgOperand(i, Trunc);
	}
	}
	continue;
	}

	// Special case switches because we need to truncate the condition.
	if (auto *Switch = dyn_cast<SwitchInst>(I)) {
	Type *Ty = TruncTysMap[Switch][0];
	if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) {
	Trunc->moveBefore(Switch);
	Switch->setCondition(Trunc);
	}
	continue;
	}

	// Don't insert a trunc for a zext which can still legally promote.
	if (auto ZExt = dyn_cast<ZExtInst>(I))
	if (ZExt->getType()->getScalarSizeInBits() > PromotedWidth)
	continue;

	// Now handle the others.
	for (unsigned i = 0; i < I->getNumOperands(); ++i) {
	Type *Ty = TruncTysMap[I][i];
	if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) {
	Trunc->moveBefore(I);
	I->setOperand(i, Trunc);
	}
	}
	}
	}

	void IRPromoter::Cleanup() {
	LLVM_DEBUG(dbgs() << "IR Promotion: Cleanup..\n");
	// Some zexts will now have become redundant, along with their trunc
	- // operands, so remove them
	+ // operands, so remove them.
	+ // Some zexts need to be replaced with truncate if src bitwidth is larger.
	for (auto *V : Visited) {
	if (!isa<ZExtInst>(V))
	continue;

	auto ZExt = cast<ZExtInst>(V);
	if (ZExt->getDestTy() != ExtTy)
	continue;

	Value *Src = ZExt->getOperand(0);
	if (ZExt->getSrcTy() == ZExt->getDestTy()) {
	LLVM_DEBUG(dbgs() << "IR Promotion: Removing unnecessary cast: " << *ZExt
	<< "\n");
	ReplaceAllUsersOfWith(ZExt, Src);
	continue;
	+ } else if (ZExt->getSrcTy()->getScalarSizeInBits() > PromotedWidth) {
	+ IRBuilder<> Builder{ZExt};
	+ Value *Trunc = Builder.CreateTrunc(Src, ZExt->getDestTy());
	+ ReplaceAllUsersOfWith(ZExt, Trunc);
	+ continue;
	}

	// We've inserted a trunc for a zext sink, but we already know that the
	// input is in range, negating the need for the trunc.
	if (NewInsts.count(Src) && isa<TruncInst>(Src)) {
	auto *Trunc = cast<TruncInst>(Src);
	assert(Trunc->getOperand(0)->getType() == ExtTy &&
	"expected inserted trunc to be operating on i32");
	ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0));
	}
	}

	for (auto *I : InstsToRemove) {
	LLVM_DEBUG(dbgs() << "IR Promotion: Removing " << *I << "\n");
	I->dropAllReferences();
	I->eraseFromParent();
	}
	}

	void IRPromoter::ConvertTruncs() {
	LLVM_DEBUG(dbgs() << "IR Promotion: Converting truncs..\n");
	IRBuilder<> Builder{Ctx};

	for (auto *V : Visited) {
	if (!isa<TruncInst>(V) \|\| Sources.count(V))
	continue;

	auto *Trunc = cast<TruncInst>(V);
	Builder.SetInsertPoint(Trunc);
	IntegerType *SrcTy = cast<IntegerType>(Trunc->getOperand(0)->getType());
	IntegerType *DestTy = cast<IntegerType>(TruncTysMap[Trunc][0]);

	unsigned NumBits = DestTy->getScalarSizeInBits();
	ConstantInt *Mask =
	ConstantInt::get(SrcTy, APInt::getMaxValue(NumBits).getZExtValue());
	Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask);

	if (auto *I = dyn_cast<Instruction>(Masked))
	NewInsts.insert(I);

	ReplaceAllUsersOfWith(Trunc, Masked);
	}
	}

	void IRPromoter::Mutate() {
	LLVM_DEBUG(dbgs() << "IR Promotion: Promoting use-def chains to "
	<< PromotedWidth << "-bits\n");

	// Cache original types of the values that will likely need truncating
	for (auto *I : Sinks) {
	if (auto *Call = dyn_cast<CallInst>(I)) {
	for (Value *Arg : Call->args())
	TruncTysMap[Call].push_back(Arg->getType());
	} else if (auto *Switch = dyn_cast<SwitchInst>(I))
	TruncTysMap[I].push_back(Switch->getCondition()->getType());
	else {
	for (unsigned i = 0; i < I->getNumOperands(); ++i)
	TruncTysMap[I].push_back(I->getOperand(i)->getType());
	}
	}
	for (auto *V : Visited) {
	if (!isa<TruncInst>(V) \|\| Sources.count(V))
	continue;
	auto *Trunc = cast<TruncInst>(V);
	TruncTysMap[Trunc].push_back(Trunc->getDestTy());
	}

	// Insert zext instructions between sources and their users.
	ExtendSources();

	// Promote visited instructions, mutating their types in place.
	PromoteTree();

	// Convert any truncs, that aren't sources, into AND masks.
	ConvertTruncs();

	// Insert trunc instructions for use by calls, stores etc...
	TruncateSinks();

	// Finally, remove unecessary zexts and truncs, delete old instructions and
	// clear the data structures.
	Cleanup();

	LLVM_DEBUG(dbgs() << "IR Promotion: Mutation complete\n");
	}

	/// We disallow booleans to make life easier when dealing with icmps but allow
	/// any other integer that fits in a scalar register. Void types are accepted
	/// so we can handle switches.
	bool TypePromotion::isSupportedType(Value *V) {
	Type *Ty = V->getType();

	// Allow voids and pointers, these won't be promoted.
	if (Ty->isVoidTy() \|\| Ty->isPointerTy())
	return true;

	if (!isa<IntegerType>(Ty) \|\| cast<IntegerType>(Ty)->getBitWidth() == 1 \|\|
	cast<IntegerType>(Ty)->getBitWidth() > RegisterBitWidth)
	return false;

	return LessOrEqualTypeSize(V);
	}

	/// We accept most instructions, as well as Arguments and ConstantInsts. We
	/// Disallow casts other than zext and truncs and only allow calls if their
	/// return value is zeroext. We don't allow opcodes that can introduce sign
	/// bits.
	bool TypePromotion::isSupportedValue(Value *V) {
	if (auto *I = dyn_cast<Instruction>(V)) {
	switch (I->getOpcode()) {
	default:
	return isa<BinaryOperator>(I) && isSupportedType(I) &&
	!GenerateSignBits(I);
	case Instruction::GetElementPtr:
	case Instruction::Store:
	case Instruction::Br:
	case Instruction::Switch:
	return true;
	case Instruction::PHI:
	case Instruction::Select:
	case Instruction::Ret:
	case Instruction::Load:
	case Instruction::Trunc:
	case Instruction::BitCast:
	return isSupportedType(I);
	case Instruction::ZExt:
	return isSupportedType(I->getOperand(0));
	case Instruction::ICmp:
	// Now that we allow small types than TypeSize, only allow icmp of
	// TypeSize because they will require a trunc to be legalised.
	// TODO: Allow icmp of smaller types, and calculate at the end
	// whether the transform would be beneficial.
	if (isa<PointerType>(I->getOperand(0)->getType()))
	return true;
	return EqualTypeSize(I->getOperand(0));
	case Instruction::Call: {
	// Special cases for calls as we need to check for zeroext
	// TODO We should accept calls even if they don't have zeroext, as they
	// can still be sinks.
	auto *Call = cast<CallInst>(I);
	return isSupportedType(Call) &&
	Call->hasRetAttr(Attribute::AttrKind::ZExt);
	}
	}
	} else if (isa<Constant>(V) && !isa<ConstantExpr>(V)) {
	return isSupportedType(V);
	} else if (isa<Argument>(V))
	return isSupportedType(V);

	return isa<BasicBlock>(V);
	}

	/// Check that the type of V would be promoted and that the original type is
	/// smaller than the targeted promoted type. Check that we're not trying to
	/// promote something larger than our base 'TypeSize' type.
	bool TypePromotion::isLegalToPromote(Value *V) {
	auto *I = dyn_cast<Instruction>(V);
	if (!I)
	return true;

	if (SafeToPromote.count(I))
	return true;

	if (isPromotedResultSafe(I) \|\| isSafeWrap(I)) {
	SafeToPromote.insert(I);
	return true;
	}
	return false;
	}

	bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
	Type *OrigTy = V->getType();
	TypeSize = OrigTy->getPrimitiveSizeInBits().getFixedSize();
	SafeToPromote.clear();
	SafeWrap.clear();

	if (!isSupportedValue(V) \|\| !shouldPromote(V) \|\| !isLegalToPromote(V))
	return false;

	LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
	<< TypeSize << " bits to " << PromotedWidth << "\n");

	SetVector<Value *> WorkList;
	SetVector<Value *> Sources;
	SetVector<Instruction *> Sinks;
	SetVector<Value *> CurrentVisited;
	WorkList.insert(V);

	// Return true if V was added to the worklist as a supported instruction,
	// if it was already visited, or if we don't need to explore it (e.g.
	// pointer values and GEPs), and false otherwise.
	auto AddLegalInst = [&](Value *V) {
	if (CurrentVisited.count(V))
	return true;

	// Ignore GEPs because they don't need promoting and the constant indices
	// will prevent the transformation.
	if (isa<GetElementPtrInst>(V))
	return true;

	if (!isSupportedValue(V) \|\| (shouldPromote(V) && !isLegalToPromote(V))) {
	LLVM_DEBUG(dbgs() << "IR Promotion: Can't handle: " << *V << "\n");
	return false;
	}

	WorkList.insert(V);
	return true;
	};

	// Iterate through, and add to, a tree of operands and users in the use-def.
	while (!WorkList.empty()) {
	Value *V = WorkList.pop_back_val();
	if (CurrentVisited.count(V))
	continue;

	// Ignore non-instructions, other than arguments.
	if (!isa<Instruction>(V) && !isSource(V))
	continue;

	// If we've already visited this value from somewhere, bail now because
	// the tree has already been explored.
	// TODO: This could limit the transform, ie if we try to promote something
	// from an i8 and fail first, before trying an i16.
	if (AllVisited.count(V))
	return false;

	CurrentVisited.insert(V);
	AllVisited.insert(V);

	// Calls can be both sources and sinks.
	if (isSink(V))
	Sinks.insert(cast<Instruction>(V));

	if (isSource(V))
	Sources.insert(V);

	if (!isSink(V) && !isSource(V)) {
	if (auto *I = dyn_cast<Instruction>(V)) {
	// Visit operands of any instruction visited.
	for (auto &U : I->operands()) {
	if (!AddLegalInst(U))
	return false;
	}
	}
	}

	// Don't visit users of a node which isn't going to be mutated unless its a
	// source.
	if (isSource(V) \|\| shouldPromote(V)) {
	for (Use &U : V->uses()) {
	if (!AddLegalInst(U.getUser()))
	return false;
	}
	}
	}

	LLVM_DEBUG({
	dbgs() << "IR Promotion: Visited nodes:\n";
	for (auto *I : CurrentVisited)
	I->dump();
	});

	unsigned ToPromote = 0;
	unsigned NonFreeArgs = 0;
	SmallPtrSet<BasicBlock *, 4> Blocks;
	for (auto *V : CurrentVisited) {
	if (auto *I = dyn_cast<Instruction>(V))
	Blocks.insert(I->getParent());

	if (Sources.count(V)) {
	if (auto *Arg = dyn_cast<Argument>(V))
	if (!Arg->hasZExtAttr() && !Arg->hasSExtAttr())
	++NonFreeArgs;
	continue;
	}

	if (Sinks.count(cast<Instruction>(V)))
	continue;
	++ToPromote;
	}

	// DAG optimizations should be able to handle these cases better, especially
	// for function arguments.
	if (ToPromote < 2 \|\| (Blocks.size() == 1 && (NonFreeArgs > SafeWrap.size())))
	return false;

	IRPromoter Promoter(*Ctx, PromotedWidth, CurrentVisited, Sources, Sinks,
	SafeWrap);
	Promoter.Mutate();
	return true;
	}

	bool TypePromotion::runOnFunction(Function &F) {
	if (skipFunction(F) \|\| DisablePromotion)
	return false;

	LLVM_DEBUG(dbgs() << "IR Promotion: Running on " << F.getName() << "\n");

	auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
	if (!TPC)
	return false;

	AllVisited.clear();
	SafeToPromote.clear();
	SafeWrap.clear();
	bool MadeChange = false;
	const DataLayout &DL = F.getParent()->getDataLayout();
	const TargetMachine &TM = TPC->getTM<TargetMachine>();
	const TargetSubtargetInfo *SubtargetInfo = TM.getSubtargetImpl(F);
	const TargetLowering *TLI = SubtargetInfo->getTargetLowering();
	const TargetTransformInfo &TII =
	getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
	RegisterBitWidth =
	TII.getRegisterBitWidth(TargetTransformInfo::RGK_Scalar).getFixedSize();
	Ctx = &F.getParent()->getContext();

	// Search up from icmps to try to promote their operands.
	for (BasicBlock &BB : F) {
	for (Instruction &I : BB) {
	if (AllVisited.count(&I))
	continue;

	if (!isa<ICmpInst>(&I))
	continue;

	auto *ICmp = cast<ICmpInst>(&I);
	// Skip signed or pointer compares
	if (ICmp->isSigned() \|\| !isa<IntegerType>(ICmp->getOperand(0)->getType()))
	continue;

	LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << *ICmp << "\n");

	for (auto &Op : ICmp->operands()) {
	if (auto *I = dyn_cast<Instruction>(Op)) {
	EVT SrcVT = TLI->getValueType(DL, I->getType());
	if (SrcVT.isSimple() && TLI->isTypeLegal(SrcVT.getSimpleVT()))
	break;

	if (TLI->getTypeAction(*Ctx, SrcVT) !=
	TargetLowering::TypePromoteInteger)
	break;
	EVT PromotedVT = TLI->getTypeToTransformTo(*Ctx, SrcVT);
	if (RegisterBitWidth < PromotedVT.getFixedSizeInBits()) {
	LLVM_DEBUG(dbgs() << "IR Promotion: Couldn't find target register "
	<< "for promoted type\n");
	break;
	}

	MadeChange \|= TryToPromote(I, PromotedVT.getFixedSizeInBits());
	break;
	}
	}
	}
	}

	AllVisited.clear();
	SafeToPromote.clear();
	SafeWrap.clear();

	return MadeChange;
	}

	INITIALIZE_PASS_BEGIN(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)
	INITIALIZE_PASS_END(TypePromotion, DEBUG_TYPE, PASS_NAME, false, false)

	char TypePromotion::ID = 0;

	FunctionPass *llvm::createTypePromotionPass() { return new TypePromotion(); }
	diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
	index 75594f90c926..b9962da1d302 100644
	--- a/llvm/lib/IR/AutoUpgrade.cpp
	+++ b/llvm/lib/IR/AutoUpgrade.cpp
	@@ -1,4706 +1,4706 @@
	//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the auto-upgrade helper functions.
	// This is where deprecated IR intrinsics and other IR features are updated to
	// current specifications.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/IR/AutoUpgrade.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstVisitor.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/IntrinsicsARM.h"
	#include "llvm/IR/IntrinsicsX86.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Verifier.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/Regex.h"
	#include <cstring>
	using namespace llvm;

	static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }

	// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
	// changed their type from v4f32 to v2i64.
	static bool UpgradePTESTIntrinsic(Function* F, Intrinsic::ID IID,
	Function *&NewFn) {
	// Check whether this is an old version of the function, which received
	// v4f32 arguments.
	Type *Arg0Type = F->getFunctionType()->getParamType(0);
	if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
	return false;

	// Yes, it's old, replace it with new version.
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
	return true;
	}

	// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
	// arguments have changed their type from i32 to i8.
	static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
	Function *&NewFn) {
	// Check that the last argument is an i32.
	Type *LastArgType = F->getFunctionType()->getParamType(
	F->getFunctionType()->getNumParams() - 1);
	if (!LastArgType->isIntegerTy(32))
	return false;

	// Move this function aside and map down.
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
	return true;
	}

	// Upgrade the declaration of fp compare intrinsics that change return type
	// from scalar to vXi1 mask.
	static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
	Function *&NewFn) {
	// Check if the return type is a vector.
	if (F->getReturnType()->isVectorTy())
	return false;

	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
	return true;
	}

	static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
	// All of the intrinsics matches below should be marked with which llvm
	// version started autoupgrading them. At some point in the future we would
	// like to use this information to remove upgrade code for some older
	// intrinsics. It is currently undecided how we will determine that future
	// point.
	if (Name == "addcarryx.u32" \|\| // Added in 8.0
	Name == "addcarryx.u64" \|\| // Added in 8.0
	Name == "addcarry.u32" \|\| // Added in 8.0
	Name == "addcarry.u64" \|\| // Added in 8.0
	Name == "subborrow.u32" \|\| // Added in 8.0
	Name == "subborrow.u64" \|\| // Added in 8.0
	Name.startswith("sse2.padds.") \|\| // Added in 8.0
	Name.startswith("sse2.psubs.") \|\| // Added in 8.0
	Name.startswith("sse2.paddus.") \|\| // Added in 8.0
	Name.startswith("sse2.psubus.") \|\| // Added in 8.0
	Name.startswith("avx2.padds.") \|\| // Added in 8.0
	Name.startswith("avx2.psubs.") \|\| // Added in 8.0
	Name.startswith("avx2.paddus.") \|\| // Added in 8.0
	Name.startswith("avx2.psubus.") \|\| // Added in 8.0
	Name.startswith("avx512.padds.") \|\| // Added in 8.0
	Name.startswith("avx512.psubs.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.padds.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.psubs.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.paddus.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.psubus.") \|\| // Added in 8.0
	Name=="ssse3.pabs.b.128" \|\| // Added in 6.0
	Name=="ssse3.pabs.w.128" \|\| // Added in 6.0
	Name=="ssse3.pabs.d.128" \|\| // Added in 6.0
	Name.startswith("fma4.vfmadd.s") \|\| // Added in 7.0
	Name.startswith("fma.vfmadd.") \|\| // Added in 7.0
	Name.startswith("fma.vfmsub.") \|\| // Added in 7.0
	Name.startswith("fma.vfmsubadd.") \|\| // Added in 7.0
	Name.startswith("fma.vfnmadd.") \|\| // Added in 7.0
	Name.startswith("fma.vfnmsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vfmadd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vfnmadd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vfnmsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask3.vfmadd.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vfmadd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask3.vfmsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask3.vfnmsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vfmaddsub.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vfmaddsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask3.vfmaddsub.") \|\| // Added in 7.0
	Name.startswith("avx512.mask3.vfmsubadd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.shuf.i") \|\| // Added in 6.0
	Name.startswith("avx512.mask.shuf.f") \|\| // Added in 6.0
	Name.startswith("avx512.kunpck") \|\| //added in 6.0
	Name.startswith("avx2.pabs.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.pabs.") \|\| // Added in 6.0
	Name.startswith("avx512.broadcastm") \|\| // Added in 6.0
	Name == "sse.sqrt.ss" \|\| // Added in 7.0
	Name == "sse2.sqrt.sd" \|\| // Added in 7.0
	Name.startswith("avx512.mask.sqrt.p") \|\| // Added in 7.0
	Name.startswith("avx.sqrt.p") \|\| // Added in 7.0
	Name.startswith("sse2.sqrt.p") \|\| // Added in 7.0
	Name.startswith("sse.sqrt.p") \|\| // Added in 7.0
	Name.startswith("avx512.mask.pbroadcast") \|\| // Added in 6.0
	Name.startswith("sse2.pcmpeq.") \|\| // Added in 3.1
	Name.startswith("sse2.pcmpgt.") \|\| // Added in 3.1
	Name.startswith("avx2.pcmpeq.") \|\| // Added in 3.1
	Name.startswith("avx2.pcmpgt.") \|\| // Added in 3.1
	Name.startswith("avx512.mask.pcmpeq.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pcmpgt.") \|\| // Added in 3.9
	Name.startswith("avx.vperm2f128.") \|\| // Added in 6.0
	Name == "avx2.vperm2i128" \|\| // Added in 6.0
	Name == "sse.add.ss" \|\| // Added in 4.0
	Name == "sse2.add.sd" \|\| // Added in 4.0
	Name == "sse.sub.ss" \|\| // Added in 4.0
	Name == "sse2.sub.sd" \|\| // Added in 4.0
	Name == "sse.mul.ss" \|\| // Added in 4.0
	Name == "sse2.mul.sd" \|\| // Added in 4.0
	Name == "sse.div.ss" \|\| // Added in 4.0
	Name == "sse2.div.sd" \|\| // Added in 4.0
	Name == "sse41.pmaxsb" \|\| // Added in 3.9
	Name == "sse2.pmaxs.w" \|\| // Added in 3.9
	Name == "sse41.pmaxsd" \|\| // Added in 3.9
	Name == "sse2.pmaxu.b" \|\| // Added in 3.9
	Name == "sse41.pmaxuw" \|\| // Added in 3.9
	Name == "sse41.pmaxud" \|\| // Added in 3.9
	Name == "sse41.pminsb" \|\| // Added in 3.9
	Name == "sse2.pmins.w" \|\| // Added in 3.9
	Name == "sse41.pminsd" \|\| // Added in 3.9
	Name == "sse2.pminu.b" \|\| // Added in 3.9
	Name == "sse41.pminuw" \|\| // Added in 3.9
	Name == "sse41.pminud" \|\| // Added in 3.9
	Name == "avx512.kand.w" \|\| // Added in 7.0
	Name == "avx512.kandn.w" \|\| // Added in 7.0
	Name == "avx512.knot.w" \|\| // Added in 7.0
	Name == "avx512.kor.w" \|\| // Added in 7.0
	Name == "avx512.kxor.w" \|\| // Added in 7.0
	Name == "avx512.kxnor.w" \|\| // Added in 7.0
	Name == "avx512.kortestc.w" \|\| // Added in 7.0
	Name == "avx512.kortestz.w" \|\| // Added in 7.0
	Name.startswith("avx512.mask.pshuf.b.") \|\| // Added in 4.0
	Name.startswith("avx2.pmax") \|\| // Added in 3.9
	Name.startswith("avx2.pmin") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pmax") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmin") \|\| // Added in 4.0
	Name.startswith("avx2.vbroadcast") \|\| // Added in 3.8
	Name.startswith("avx2.pbroadcast") \|\| // Added in 3.8
	Name.startswith("avx.vpermil.") \|\| // Added in 3.1
	Name.startswith("sse2.pshuf") \|\| // Added in 3.9
	Name.startswith("avx512.pbroadcast") \|\| // Added in 3.9
	Name.startswith("avx512.mask.broadcast.s") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movddup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movshdup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.movsldup") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshuf.d.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshufl.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pshufh.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.shuf.p") \|\| // Added in 4.0
	Name.startswith("avx512.mask.vpermil.p") \|\| // Added in 3.9
	Name.startswith("avx512.mask.perm.df.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.perm.di.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.punpckl") \|\| // Added in 3.9
	Name.startswith("avx512.mask.punpckh") \|\| // Added in 3.9
	Name.startswith("avx512.mask.unpckl.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.unpckh.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pand.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pandn.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.por.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pxor.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.and.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.andn.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.or.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.xor.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.padd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psub.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmull.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.cvtdq2pd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.cvtudq2pd.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.cvtudq2ps.") \|\| // Added in 7.0 updated 9.0
	Name.startswith("avx512.mask.cvtqq2pd.") \|\| // Added in 7.0 updated 9.0
	Name.startswith("avx512.mask.cvtuqq2pd.") \|\| // Added in 7.0 updated 9.0
	Name.startswith("avx512.mask.cvtdq2ps.") \|\| // Added in 7.0 updated 9.0
	Name == "avx512.mask.vcvtph2ps.128" \|\| // Added in 11.0
	Name == "avx512.mask.vcvtph2ps.256" \|\| // Added in 11.0
	Name == "avx512.mask.cvtqq2ps.256" \|\| // Added in 9.0
	Name == "avx512.mask.cvtqq2ps.512" \|\| // Added in 9.0
	Name == "avx512.mask.cvtuqq2ps.256" \|\| // Added in 9.0
	Name == "avx512.mask.cvtuqq2ps.512" \|\| // Added in 9.0
	Name == "avx512.mask.cvtpd2dq.256" \|\| // Added in 7.0
	Name == "avx512.mask.cvtpd2ps.256" \|\| // Added in 7.0
	Name == "avx512.mask.cvttpd2dq.256" \|\| // Added in 7.0
	Name == "avx512.mask.cvttps2dq.128" \|\| // Added in 7.0
	Name == "avx512.mask.cvttps2dq.256" \|\| // Added in 7.0
	Name == "avx512.mask.cvtps2pd.128" \|\| // Added in 7.0
	Name == "avx512.mask.cvtps2pd.256" \|\| // Added in 7.0
	Name == "avx512.cvtusi2sd" \|\| // Added in 7.0
	Name.startswith("avx512.mask.permvar.") \|\| // Added in 7.0
	Name == "sse2.pmulu.dq" \|\| // Added in 7.0
	Name == "sse41.pmuldq" \|\| // Added in 7.0
	Name == "avx2.pmulu.dq" \|\| // Added in 7.0
	Name == "avx2.pmul.dq" \|\| // Added in 7.0
	Name == "avx512.pmulu.dq.512" \|\| // Added in 7.0
	Name == "avx512.pmul.dq.512" \|\| // Added in 7.0
	Name.startswith("avx512.mask.pmul.dq.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmulu.dq.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmul.hr.sw.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.pmulh.w.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.pmulhu.w.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.pmaddw.d.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.pmaddubs.w.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.packsswb.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packssdw.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packuswb.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.packusdw.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.b") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.d") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.q") \|\| // Added in 5.0
	Name.startswith("avx512.mask.cmp.w") \|\| // Added in 5.0
	Name.startswith("avx512.cmp.p") \|\| // Added in 12.0
	Name.startswith("avx512.mask.ucmp.") \|\| // Added in 5.0
	Name.startswith("avx512.cvtb2mask.") \|\| // Added in 7.0
	Name.startswith("avx512.cvtw2mask.") \|\| // Added in 7.0
	Name.startswith("avx512.cvtd2mask.") \|\| // Added in 7.0
	Name.startswith("avx512.cvtq2mask.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpermilvar.") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psll.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psra.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.d") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.q") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrl.w") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pslli") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrai") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrli") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psllv") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrav") \|\| // Added in 4.0
	Name.startswith("avx512.mask.psrlv") \|\| // Added in 4.0
	Name.startswith("sse41.pmovsx") \|\| // Added in 3.8
	Name.startswith("sse41.pmovzx") \|\| // Added in 3.9
	Name.startswith("avx2.pmovsx") \|\| // Added in 3.9
	Name.startswith("avx2.pmovzx") \|\| // Added in 3.9
	Name.startswith("avx512.mask.pmovsx") \|\| // Added in 4.0
	Name.startswith("avx512.mask.pmovzx") \|\| // Added in 4.0
	Name.startswith("avx512.mask.lzcnt.") \|\| // Added in 5.0
	Name.startswith("avx512.mask.pternlog.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.pternlog.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpmadd52") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpmadd52") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpermi2var.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpermt2var.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpermt2var.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpdpbusd.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpdpbusd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpdpbusds.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpdpbusds.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpdpwssd.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpdpwssd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpdpwssds.") \|\| // Added in 7.0
	Name.startswith("avx512.maskz.vpdpwssds.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.dbpsadbw.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpshld.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpshrd.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpshldv.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.vpshrdv.") \|\| // Added in 8.0
	Name.startswith("avx512.maskz.vpshldv.") \|\| // Added in 8.0
	Name.startswith("avx512.maskz.vpshrdv.") \|\| // Added in 8.0
	Name.startswith("avx512.vpshld.") \|\| // Added in 8.0
	Name.startswith("avx512.vpshrd.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.add.p") \|\| // Added in 7.0. 128/256 in 4.0
	Name.startswith("avx512.mask.sub.p") \|\| // Added in 7.0. 128/256 in 4.0
	Name.startswith("avx512.mask.mul.p") \|\| // Added in 7.0. 128/256 in 4.0
	Name.startswith("avx512.mask.div.p") \|\| // Added in 7.0. 128/256 in 4.0
	Name.startswith("avx512.mask.max.p") \|\| // Added in 7.0. 128/256 in 5.0
	Name.startswith("avx512.mask.min.p") \|\| // Added in 7.0. 128/256 in 5.0
	Name.startswith("avx512.mask.fpclass.p") \|\| // Added in 7.0
	Name.startswith("avx512.mask.vpshufbitqmb.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.pmultishift.qb.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.conflict.") \|\| // Added in 9.0
	Name == "avx512.mask.pmov.qd.256" \|\| // Added in 9.0
	Name == "avx512.mask.pmov.qd.512" \|\| // Added in 9.0
	Name == "avx512.mask.pmov.wb.256" \|\| // Added in 9.0
	Name == "avx512.mask.pmov.wb.512" \|\| // Added in 9.0
	Name == "sse.cvtsi2ss" \|\| // Added in 7.0
	Name == "sse.cvtsi642ss" \|\| // Added in 7.0
	Name == "sse2.cvtsi2sd" \|\| // Added in 7.0
	Name == "sse2.cvtsi642sd" \|\| // Added in 7.0
	Name == "sse2.cvtss2sd" \|\| // Added in 7.0
	Name == "sse2.cvtdq2pd" \|\| // Added in 3.9
	Name == "sse2.cvtdq2ps" \|\| // Added in 7.0
	Name == "sse2.cvtps2pd" \|\| // Added in 3.9
	Name == "avx.cvtdq2.pd.256" \|\| // Added in 3.9
	Name == "avx.cvtdq2.ps.256" \|\| // Added in 7.0
	Name == "avx.cvt.ps2.pd.256" \|\| // Added in 3.9
	Name.startswith("vcvtph2ps.") \|\| // Added in 11.0
	Name.startswith("avx.vinsertf128.") \|\| // Added in 3.7
	Name == "avx2.vinserti128" \|\| // Added in 3.7
	Name.startswith("avx512.mask.insert") \|\| // Added in 4.0
	Name.startswith("avx.vextractf128.") \|\| // Added in 3.7
	Name == "avx2.vextracti128" \|\| // Added in 3.7
	Name.startswith("avx512.mask.vextract") \|\| // Added in 4.0
	Name.startswith("sse4a.movnt.") \|\| // Added in 3.9
	Name.startswith("avx.movnt.") \|\| // Added in 3.2
	Name.startswith("avx512.storent.") \|\| // Added in 3.9
	Name == "sse41.movntdqa" \|\| // Added in 5.0
	Name == "avx2.movntdqa" \|\| // Added in 5.0
	Name == "avx512.movntdqa" \|\| // Added in 5.0
	Name == "sse2.storel.dq" \|\| // Added in 3.9
	Name.startswith("sse.storeu.") \|\| // Added in 3.9
	Name.startswith("sse2.storeu.") \|\| // Added in 3.9
	Name.startswith("avx.storeu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.storeu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.p") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.b.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.w.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.d.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.store.q.") \|\| // Added in 3.9
	Name == "avx512.mask.store.ss" \|\| // Added in 7.0
	Name.startswith("avx512.mask.loadu.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.load.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.expand.load.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.compress.store.") \|\| // Added in 7.0
	Name.startswith("avx512.mask.expand.b") \|\| // Added in 9.0
	Name.startswith("avx512.mask.expand.w") \|\| // Added in 9.0
	Name.startswith("avx512.mask.expand.d") \|\| // Added in 9.0
	Name.startswith("avx512.mask.expand.q") \|\| // Added in 9.0
	Name.startswith("avx512.mask.expand.p") \|\| // Added in 9.0
	Name.startswith("avx512.mask.compress.b") \|\| // Added in 9.0
	Name.startswith("avx512.mask.compress.w") \|\| // Added in 9.0
	Name.startswith("avx512.mask.compress.d") \|\| // Added in 9.0
	Name.startswith("avx512.mask.compress.q") \|\| // Added in 9.0
	Name.startswith("avx512.mask.compress.p") \|\| // Added in 9.0
	Name == "sse42.crc32.64.8" \|\| // Added in 3.4
	Name.startswith("avx.vbroadcast.s") \|\| // Added in 3.5
	Name.startswith("avx512.vbroadcast.s") \|\| // Added in 7.0
	Name.startswith("avx512.mask.palignr.") \|\| // Added in 3.9
	Name.startswith("avx512.mask.valign.") \|\| // Added in 4.0
	Name.startswith("sse2.psll.dq") \|\| // Added in 3.7
	Name.startswith("sse2.psrl.dq") \|\| // Added in 3.7
	Name.startswith("avx2.psll.dq") \|\| // Added in 3.7
	Name.startswith("avx2.psrl.dq") \|\| // Added in 3.7
	Name.startswith("avx512.psll.dq") \|\| // Added in 3.9
	Name.startswith("avx512.psrl.dq") \|\| // Added in 3.9
	Name == "sse41.pblendw" \|\| // Added in 3.7
	Name.startswith("sse41.blendp") \|\| // Added in 3.7
	Name.startswith("avx.blend.p") \|\| // Added in 3.7
	Name == "avx2.pblendw" \|\| // Added in 3.7
	Name.startswith("avx2.pblendd.") \|\| // Added in 3.7
	Name.startswith("avx.vbroadcastf128") \|\| // Added in 4.0
	Name == "avx2.vbroadcasti128" \|\| // Added in 3.7
	Name.startswith("avx512.mask.broadcastf32x4.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcastf64x2.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcastf32x8.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcastf64x4.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcasti32x4.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcasti64x2.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcasti32x8.") \|\| // Added in 6.0
	Name.startswith("avx512.mask.broadcasti64x4.") \|\| // Added in 6.0
	Name == "xop.vpcmov" \|\| // Added in 3.8
	Name == "xop.vpcmov.256" \|\| // Added in 5.0
	Name.startswith("avx512.mask.move.s") \|\| // Added in 4.0
	Name.startswith("avx512.cvtmask2") \|\| // Added in 5.0
	Name.startswith("xop.vpcom") \|\| // Added in 3.2, Updated in 9.0
	Name.startswith("xop.vprot") \|\| // Added in 8.0
	Name.startswith("avx512.prol") \|\| // Added in 8.0
	Name.startswith("avx512.pror") \|\| // Added in 8.0
	Name.startswith("avx512.mask.prorv.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.pror.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.prolv.") \|\| // Added in 8.0
	Name.startswith("avx512.mask.prol.") \|\| // Added in 8.0
	Name.startswith("avx512.ptestm") \|\| //Added in 6.0
	Name.startswith("avx512.ptestnm") \|\| //Added in 6.0
	Name.startswith("avx512.mask.pavg")) // Added in 6.0
	return true;

	return false;
	}

	static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
	Function *&NewFn) {
	// Only handle intrinsics that start with "x86.".
	if (!Name.startswith("x86."))
	return false;
	// Remove "x86." prefix.
	Name = Name.substr(4);

	if (ShouldUpgradeX86Intrinsic(F, Name)) {
	NewFn = nullptr;
	return true;
	}

	if (Name == "rdtscp") { // Added in 8.0
	// If this intrinsic has 0 operands, it's the new version.
	if (F->getFunctionType()->getNumParams() == 0)
	return false;

	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_rdtscp);
	return true;
	}

	// SSE4.1 ptest functions may have an old signature.
	if (Name.startswith("sse41.ptest")) { // Added in 3.2
	if (Name.substr(11) == "c")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestc, NewFn);
	if (Name.substr(11) == "z")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestz, NewFn);
	if (Name.substr(11) == "nzc")
	return UpgradePTESTIntrinsic(F, Intrinsic::x86_sse41_ptestnzc, NewFn);
	}
	// Several blend and other instructions with masks used the wrong number of
	// bits.
	if (Name == "sse41.insertps") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
	NewFn);
	if (Name == "sse41.dppd") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
	NewFn);
	if (Name == "sse41.dpps") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
	NewFn);
	if (Name == "sse41.mpsadbw") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
	NewFn);
	if (Name == "avx.dp.ps.256") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
	NewFn);
	if (Name == "avx2.mpsadbw") // Added in 3.6
	return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
	NewFn);
	if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
	NewFn);
	if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
	NewFn);
	if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
	NewFn);
	if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
	NewFn);
	if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
	NewFn);
	if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
	return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
	NewFn);

	// frcz.ss/sd may need to have an argument dropped. Added in 3.2
	if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_xop_vfrcz_ss);
	return true;
	}
	if (Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_xop_vfrcz_sd);
	return true;
	}
	// Upgrade any XOP PERMIL2 index operand still using a float/double vector.
	if (Name.startswith("xop.vpermil2")) { // Added in 3.9
	auto Idx = F->getFunctionType()->getParamType(2);
	if (Idx->isFPOrFPVectorTy()) {
	rename(F);
	unsigned IdxSize = Idx->getPrimitiveSizeInBits();
	unsigned EltSize = Idx->getScalarSizeInBits();
	Intrinsic::ID Permil2ID;
	if (EltSize == 64 && IdxSize == 128)
	Permil2ID = Intrinsic::x86_xop_vpermil2pd;
	else if (EltSize == 32 && IdxSize == 128)
	Permil2ID = Intrinsic::x86_xop_vpermil2ps;
	else if (EltSize == 64 && IdxSize == 256)
	Permil2ID = Intrinsic::x86_xop_vpermil2pd_256;
	else
	Permil2ID = Intrinsic::x86_xop_vpermil2ps_256;
	NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
	return true;
	}
	}

	if (Name == "seh.recoverfp") {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
	return true;
	}

	return false;
	}

	static bool UpgradeIntrinsicFunction1(Function F, Function &NewFn) {
	assert(F && "Illegal to upgrade a non-existent Function.");

	// Quickly eliminate it, if it's not a candidate.
	StringRef Name = F->getName();
	if (Name.size() <= 8 \|\| !Name.startswith("llvm."))
	return false;
	Name = Name.substr(5); // Strip off "llvm."

	switch (Name[0]) {
	default: break;
	case 'a': {
	if (Name.startswith("arm.rbit") \|\| Name.startswith("aarch64.rbit")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("aarch64.neon.frintn")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::roundeven,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("aarch64.neon.rbit")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::bitreverse,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("arm.neon.vclz")) {
	Type* args[2] = {
	F->arg_begin()->getType(),
	Type::getInt1Ty(F->getContext())
	};
	// Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
	// the end of the name. Change name from llvm.arm.neon.vclz.* to
	// llvm.ctlz.*
	FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
	NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
	"llvm.ctlz." + Name.substr(14), F->getParent());
	return true;
	}
	if (Name.startswith("arm.neon.vcnt")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
	F->arg_begin()->getType());
	return true;
	}
	static const Regex vstRegex("^arm\\.neon\\.vst([1234]\|[234]lane)\\.v[a-z0-9]*$");
	if (vstRegex.match(Name)) {
	static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
	Intrinsic::arm_neon_vst2,
	Intrinsic::arm_neon_vst3,
	Intrinsic::arm_neon_vst4};

	static const Intrinsic::ID StoreLaneInts[] = {
	Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
	Intrinsic::arm_neon_vst4lane
	};

	auto fArgs = F->getFunctionType()->params();
	Type *Tys[] = {fArgs[0], fArgs[1]};
	if (!Name.contains("lane"))
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	StoreInts[fArgs.size() - 3], Tys);
	else
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	StoreLaneInts[fArgs.size() - 5], Tys);
	return true;
	}
	if (Name == "aarch64.thread.pointer" \|\| Name == "arm.thread.pointer") {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
	return true;
	}
	if (Name.startswith("arm.neon.vqadds.")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::sadd_sat,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("arm.neon.vqaddu.")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::uadd_sat,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("arm.neon.vqsubs.")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ssub_sat,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("arm.neon.vqsubu.")) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::usub_sat,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("aarch64.neon.addp")) {
	if (F->arg_size() != 2)
	break; // Invalid IR.
	VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
	if (Ty && Ty->getElementType()->isFloatingPointTy()) {
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::aarch64_neon_faddp, Ty);
	return true;
	}
	}

	// Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and v16i8
	// respectively
	if ((Name.startswith("arm.neon.bfdot.") \|\|
	Name.startswith("aarch64.neon.bfdot.")) &&
	Name.endswith("i8")) {
	Intrinsic::ID IID =
	StringSwitch<Intrinsic::ID>(Name)
	.Cases("arm.neon.bfdot.v2f32.v8i8",
	"arm.neon.bfdot.v4f32.v16i8",
	Intrinsic::arm_neon_bfdot)
	.Cases("aarch64.neon.bfdot.v2f32.v8i8",
	"aarch64.neon.bfdot.v4f32.v16i8",
	Intrinsic::aarch64_neon_bfdot)
	.Default(Intrinsic::not_intrinsic);
	if (IID == Intrinsic::not_intrinsic)
	break;

	size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
	assert((OperandWidth == 64 \|\| OperandWidth == 128) &&
	"Unexpected operand width");
	LLVMContext &Ctx = F->getParent()->getContext();
	std::array<Type *, 2> Tys {{
	F->getReturnType(),
	FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)
	}};
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
	return true;
	}

	// Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic anymore
	// and accept v8bf16 instead of v16i8
	if ((Name.startswith("arm.neon.bfm") \|\|
	Name.startswith("aarch64.neon.bfm")) &&
	Name.endswith(".v4f32.v16i8")) {
	Intrinsic::ID IID =
	StringSwitch<Intrinsic::ID>(Name)
	.Case("arm.neon.bfmmla.v4f32.v16i8",
	Intrinsic::arm_neon_bfmmla)
	.Case("arm.neon.bfmlalb.v4f32.v16i8",
	Intrinsic::arm_neon_bfmlalb)
	.Case("arm.neon.bfmlalt.v4f32.v16i8",
	Intrinsic::arm_neon_bfmlalt)
	.Case("aarch64.neon.bfmmla.v4f32.v16i8",
	Intrinsic::aarch64_neon_bfmmla)
	.Case("aarch64.neon.bfmlalb.v4f32.v16i8",
	Intrinsic::aarch64_neon_bfmlalb)
	.Case("aarch64.neon.bfmlalt.v4f32.v16i8",
	Intrinsic::aarch64_neon_bfmlalt)
	.Default(Intrinsic::not_intrinsic);
	if (IID == Intrinsic::not_intrinsic)
	break;

	std::array<Type *, 0> Tys;
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID, Tys);
	return true;
	}

	if (Name == "arm.mve.vctp64" &&
	cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
	// A vctp64 returning a v4i1 is converted to return a v2i1. Rename the
	// function and deal with it below in UpgradeIntrinsicCall.
	rename(F);
	return true;
	}
	// These too are changed to accept a v2i1 insteead of the old v4i1.
	if (Name == "arm.mve.mull.int.predicated.v2i64.v4i32.v4i1" \|\|
	Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" \|\|
	Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" \|\|
	Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx2qa.predicated.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx3q.predicated.v2i64.v4i1" \|\|
	Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
	return true;

	if (Name == "amdgcn.alignbit") {
	// Target specific intrinsic became redundant
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
	{F->getReturnType()});
	return true;
	}

	break;
	}

	case 'c': {
	if (Name.startswith("ctlz.") && F->arg_size() == 1) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
	F->arg_begin()->getType());
	return true;
	}
	if (Name.startswith("cttz.") && F->arg_size() == 1) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
	F->arg_begin()->getType());
	return true;
	}
	break;
	}
	case 'd': {
	if (Name == "dbg.value" && F->arg_size() == 4) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
	return true;
	}
	break;
	}
	case 'e': {
	if (Name.startswith("experimental.vector.extract.")) {
	rename(F);
	Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::vector_extract, Tys);
	return true;
	}

	if (Name.startswith("experimental.vector.insert.")) {
	rename(F);
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = {Args[0], Args[1]};
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::vector_insert, Tys);
	return true;
	}

	SmallVector<StringRef, 2> Groups;
	static const Regex R("^experimental.vector.reduce.([a-z]+)\\.[a-z][0-9]+");
	if (R.match(Name, &Groups)) {
	Intrinsic::ID ID;
	ID = StringSwitch<Intrinsic::ID>(Groups[1])
	.Case("add", Intrinsic::vector_reduce_add)
	.Case("mul", Intrinsic::vector_reduce_mul)
	.Case("and", Intrinsic::vector_reduce_and)
	.Case("or", Intrinsic::vector_reduce_or)
	.Case("xor", Intrinsic::vector_reduce_xor)
	.Case("smax", Intrinsic::vector_reduce_smax)
	.Case("smin", Intrinsic::vector_reduce_smin)
	.Case("umax", Intrinsic::vector_reduce_umax)
	.Case("umin", Intrinsic::vector_reduce_umin)
	.Case("fmax", Intrinsic::vector_reduce_fmax)
	.Case("fmin", Intrinsic::vector_reduce_fmin)
	.Default(Intrinsic::not_intrinsic);
	if (ID != Intrinsic::not_intrinsic) {
	rename(F);
	auto Args = F->getFunctionType()->params();
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, {Args[0]});
	return true;
	}
	}
	static const Regex R2(
	"^experimental.vector.reduce.v2.([a-z]+)\\.[fi][0-9]+");
	Groups.clear();
	if (R2.match(Name, &Groups)) {
	Intrinsic::ID ID = Intrinsic::not_intrinsic;
	if (Groups[1] == "fadd")
	ID = Intrinsic::vector_reduce_fadd;
	if (Groups[1] == "fmul")
	ID = Intrinsic::vector_reduce_fmul;
	if (ID != Intrinsic::not_intrinsic) {
	rename(F);
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = {Args[1]};
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
	return true;
	}
	}
	break;
	}
	case 'i':
	case 'l': {
	bool IsLifetimeStart = Name.startswith("lifetime.start");
	if (IsLifetimeStart \|\| Name.startswith("invariant.start")) {
	Intrinsic::ID ID = IsLifetimeStart ?
	Intrinsic::lifetime_start : Intrinsic::invariant_start;
	auto Args = F->getFunctionType()->params();
	Type* ObjectPtr[1] = {Args[1]};
	if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
	return true;
	}
	}

	bool IsLifetimeEnd = Name.startswith("lifetime.end");
	if (IsLifetimeEnd \|\| Name.startswith("invariant.end")) {
	Intrinsic::ID ID = IsLifetimeEnd ?
	Intrinsic::lifetime_end : Intrinsic::invariant_end;

	auto Args = F->getFunctionType()->params();
	Type* ObjectPtr[1] = {Args[IsLifetimeEnd ? 1 : 2]};
	if (F->getName() != Intrinsic::getName(ID, ObjectPtr, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
	return true;
	}
	}
	if (Name.startswith("invariant.group.barrier")) {
	// Rename invariant.group.barrier to launder.invariant.group
	auto Args = F->getFunctionType()->params();
	Type* ObjectPtr[1] = {Args[0]};
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::launder_invariant_group, ObjectPtr);
	return true;

	}

	break;
	}
	case 'm': {
	if (Name.startswith("masked.load.")) {
	Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
	if (F->getName() !=
	Intrinsic::getName(Intrinsic::masked_load, Tys, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_load,
	Tys);
	return true;
	}
	}
	if (Name.startswith("masked.store.")) {
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = { Args[0], Args[1] };
	if (F->getName() !=
	Intrinsic::getName(Intrinsic::masked_store, Tys, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_store,
	Tys);
	return true;
	}
	}
	// Renaming gather/scatter intrinsics with no address space overloading
	// to the new overload which includes an address space
	if (Name.startswith("masked.gather.")) {
	Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
	if (F->getName() !=
	Intrinsic::getName(Intrinsic::masked_gather, Tys, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_gather, Tys);
	return true;
	}
	}
	if (Name.startswith("masked.scatter.")) {
	auto Args = F->getFunctionType()->params();
	Type *Tys[] = {Args[0], Args[1]};
	if (F->getName() !=
	Intrinsic::getName(Intrinsic::masked_scatter, Tys, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_scatter, Tys);
	return true;
	}
	}
	// Updating the memory intrinsics (memcpy/memmove/memset) that have an
	// alignment parameter to embedding the alignment as an attribute of
	// the pointer args.
	if (Name.startswith("memcpy.") && F->arg_size() == 5) {
	rename(F);
	// Get the types of dest, src, and len
	ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memcpy,
	ParamTypes);
	return true;
	}
	if (Name.startswith("memmove.") && F->arg_size() == 5) {
	rename(F);
	// Get the types of dest, src, and len
	ArrayRef<Type *> ParamTypes = F->getFunctionType()->params().slice(0, 3);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memmove,
	ParamTypes);
	return true;
	}
	if (Name.startswith("memset.") && F->arg_size() == 5) {
	rename(F);
	// Get the types of dest, and len
	const auto *FT = F->getFunctionType();
	Type *ParamTypes[2] = {
	FT->getParamType(0), // Dest
	FT->getParamType(2) // len
	};
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::memset,
	ParamTypes);
	return true;
	}
	break;
	}
	case 'n': {
	if (Name.startswith("nvvm.")) {
	Name = Name.substr(5);

	// The following nvvm intrinsics correspond exactly to an LLVM intrinsic.
	Intrinsic::ID IID = StringSwitch<Intrinsic::ID>(Name)
	.Cases("brev32", "brev64", Intrinsic::bitreverse)
	.Case("clz.i", Intrinsic::ctlz)
	.Case("popc.i", Intrinsic::ctpop)
	.Default(Intrinsic::not_intrinsic);
	if (IID != Intrinsic::not_intrinsic && F->arg_size() == 1) {
	NewFn = Intrinsic::getDeclaration(F->getParent(), IID,
	{F->getReturnType()});
	return true;
	}

	// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
	// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
	//
	// TODO: We could add lohi.i2d.
	bool Expand = StringSwitch<bool>(Name)
	.Cases("abs.i", "abs.ll", true)
	.Cases("clz.ll", "popc.ll", "h2f", true)
	.Cases("max.i", "max.ll", "max.ui", "max.ull", true)
	.Cases("min.i", "min.ll", "min.ui", "min.ull", true)
	.StartsWith("atomic.load.add.f32.p", true)
	.StartsWith("atomic.load.add.f64.p", true)
	.Default(false);
	if (Expand) {
	NewFn = nullptr;
	return true;
	}
	}
	break;
	}
	case 'o':
	// We only need to change the name to match the mangling including the
	// address space.
	if (Name.startswith("objectsize.")) {
	Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
	if (F->arg_size() == 2 \|\| F->arg_size() == 3 \|\|
	F->getName() !=
	Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
	Tys);
	return true;
	}
	}
	break;

	case 'p':
	if (Name == "prefetch") {
	// Handle address space overloading.
	Type *Tys[] = {F->arg_begin()->getType()};
	if (F->getName() !=
	Intrinsic::getName(Intrinsic::prefetch, Tys, F->getParent())) {
	rename(F);
	NewFn =
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
	return true;
	}
	} else if (Name.startswith("ptr.annotation.") && F->arg_size() == 4) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::ptr_annotation,
	F->arg_begin()->getType());
	return true;
	}
	break;

	case 's':
	if (Name == "stackprotectorcheck") {
	NewFn = nullptr;
	return true;
	}
	break;

	case 'v': {
	if (Name == "var.annotation" && F->arg_size() == 4) {
	rename(F);
	NewFn = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::var_annotation);
	return true;
	}
	break;
	}

	case 'x':
	if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
	return true;
	}

	auto *ST = dyn_cast<StructType>(F->getReturnType());
	if (ST && (!ST->isLiteral() \|\| ST->isPacked())) {
	// Replace return type with literal non-packed struct. Only do this for
	// intrinsics declared to return a struct, not for intrinsics with
	// overloaded return type, in which case the exact struct type will be
	// mangled into the name.
	SmallVector<Intrinsic::IITDescriptor> Desc;
	Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc);
	if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
	auto *FT = F->getFunctionType();
	auto *NewST = StructType::get(ST->getContext(), ST->elements());
	auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
	std::string Name = F->getName().str();
	rename(F);
	NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
	Name, F->getParent());

	// The new function may also need remangling.
	- if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F))
	+ if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
	NewFn = *Result;
	return true;
	}
	}

	// Remangle our intrinsic since we upgrade the mangling
	auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F);
	if (Result != None) {
	NewFn = *Result;
	return true;
	}

	// This may not belong here. This function is effectively being overloaded
	// to both detect an intrinsic which needs upgrading, and to provide the
	// upgraded form of the intrinsic. We should perhaps have two separate
	// functions for this.
	return false;
	}

	bool llvm::UpgradeIntrinsicFunction(Function F, Function &NewFn) {
	NewFn = nullptr;
	bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
	assert(F != NewFn && "Intrinsic function upgraded to the same function");

	// Upgrade intrinsic attributes. This does not change the function.
	if (NewFn)
	F = NewFn;
	if (Intrinsic::ID id = F->getIntrinsicID())
	F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
	return Upgraded;
	}

	GlobalVariable llvm::UpgradeGlobalVariable(GlobalVariable GV) {
	if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" \|\|
	GV->getName() == "llvm.global_dtors")) \|\|
	!GV->hasInitializer())
	return nullptr;
	ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
	if (!ATy)
	return nullptr;
	StructType *STy = dyn_cast<StructType>(ATy->getElementType());
	if (!STy \|\| STy->getNumElements() != 2)
	return nullptr;

	LLVMContext &C = GV->getContext();
	IRBuilder<> IRB(C);
	auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
	IRB.getInt8PtrTy());
	Constant *Init = GV->getInitializer();
	unsigned N = Init->getNumOperands();
	std::vector<Constant *> NewCtors(N);
	for (unsigned i = 0; i != N; ++i) {
	auto Ctor = cast<Constant>(Init->getOperand(i));
	NewCtors[i] = ConstantStruct::get(
	EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
	Constant::getNullValue(IRB.getInt8PtrTy()));
	}
	Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);

	return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
	NewInit, GV->getName());
	}

	// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
	// to byte shuffles.
	static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
	Value *Op, unsigned Shift) {
	auto *ResultTy = cast<FixedVectorType>(Op->getType());
	unsigned NumElts = ResultTy->getNumElements() * 8;

	// Bitcast from a 64-bit element type to a byte element type.
	Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
	Op = Builder.CreateBitCast(Op, VecTy, "cast");

	// We'll be shuffling in zeroes.
	Value *Res = Constant::getNullValue(VecTy);

	// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
	// we'll just return the zero vector.
	if (Shift < 16) {
	int Idxs[64];
	// 256/512-bit version is split into 2/4 16-byte lanes.
	for (unsigned l = 0; l != NumElts; l += 16)
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = NumElts + i - Shift;
	if (Idx < NumElts)
	Idx -= NumElts - 16; // end of lane, switch operand.
	Idxs[l + i] = Idx + l;
	}

	Res = Builder.CreateShuffleVector(Res, Op, makeArrayRef(Idxs, NumElts));
	}

	// Bitcast back to a 64-bit element type.
	return Builder.CreateBitCast(Res, ResultTy, "cast");
	}

	// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
	// to byte shuffles.
	static Value UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value Op,
	unsigned Shift) {
	auto *ResultTy = cast<FixedVectorType>(Op->getType());
	unsigned NumElts = ResultTy->getNumElements() * 8;

	// Bitcast from a 64-bit element type to a byte element type.
	Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
	Op = Builder.CreateBitCast(Op, VecTy, "cast");

	// We'll be shuffling in zeroes.
	Value *Res = Constant::getNullValue(VecTy);

	// If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
	// we'll just return the zero vector.
	if (Shift < 16) {
	int Idxs[64];
	// 256/512-bit version is split into 2/4 16-byte lanes.
	for (unsigned l = 0; l != NumElts; l += 16)
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = i + Shift;
	if (Idx >= 16)
	Idx += NumElts - 16; // end of lane, switch operand.
	Idxs[l + i] = Idx + l;
	}

	Res = Builder.CreateShuffleVector(Op, Res, makeArrayRef(Idxs, NumElts));
	}

	// Bitcast back to a 64-bit element type.
	return Builder.CreateBitCast(Res, ResultTy, "cast");
	}

	static Value getX86MaskVec(IRBuilder<> &Builder, Value Mask,
	unsigned NumElts) {
	assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
	llvm::VectorType *MaskTy = FixedVectorType::get(
	Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
	Mask = Builder.CreateBitCast(Mask, MaskTy);

	// If we have less than 8 elements (1, 2 or 4), then the starting mask was an
	// i8 and we need to extract down to the right number of elements.
	if (NumElts <= 4) {
	int Indices[4];
	for (unsigned i = 0; i != NumElts; ++i)
	Indices[i] = i;
	Mask = Builder.CreateShuffleVector(
	Mask, Mask, makeArrayRef(Indices, NumElts), "extract");
	}

	return Mask;
	}

	static Value EmitX86Select(IRBuilder<> &Builder, Value Mask,
	Value Op0, Value Op1) {
	// If the mask is all ones just emit the first operation.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Op0;

	Mask = getX86MaskVec(Builder, Mask,
	cast<FixedVectorType>(Op0->getType())->getNumElements());
	return Builder.CreateSelect(Mask, Op0, Op1);
	}

	static Value EmitX86ScalarSelect(IRBuilder<> &Builder, Value Mask,
	Value Op0, Value Op1) {
	// If the mask is all ones just emit the first operation.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Op0;

	auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
	Mask->getType()->getIntegerBitWidth());
	Mask = Builder.CreateBitCast(Mask, MaskTy);
	Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
	return Builder.CreateSelect(Mask, Op0, Op1);
	}

	// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
	// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
	// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
	static Value UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value Op0,
	Value Op1, Value Shift,
	Value Passthru, Value Mask,
	bool IsVALIGN) {
	unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();

	unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
	assert((IsVALIGN \|\| NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
	assert((!IsVALIGN \|\| NumElts <= 16) && "NumElts too large for VALIGN!");
	assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");

	// Mask the immediate for VALIGN.
	if (IsVALIGN)
	ShiftVal &= (NumElts - 1);

	// If palignr is shifting the pair of vectors more than the size of two
	// lanes, emit zero.
	if (ShiftVal >= 32)
	return llvm::Constant::getNullValue(Op0->getType());

	// If palignr is shifting the pair of input vectors more than one lane,
	// but less than two lanes, convert to shifting in zeroes.
	if (ShiftVal > 16) {
	ShiftVal -= 16;
	Op1 = Op0;
	Op0 = llvm::Constant::getNullValue(Op0->getType());
	}

	int Indices[64];
	// 256-bit palignr operates on 128-bit lanes so we need to handle that
	for (unsigned l = 0; l < NumElts; l += 16) {
	for (unsigned i = 0; i != 16; ++i) {
	unsigned Idx = ShiftVal + i;
	if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
	Idx += NumElts - 16; // End of lane, switch operand.
	Indices[l + i] = Idx + l;
	}
	}

	Value *Align = Builder.CreateShuffleVector(Op1, Op0,
	makeArrayRef(Indices, NumElts),
	"palignr");

	return EmitX86Select(Builder, Mask, Align, Passthru);
	}

	static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI,
	bool ZeroMask, bool IndexForm) {
	Type *Ty = CI.getType();
	unsigned VecWidth = Ty->getPrimitiveSizeInBits();
	unsigned EltWidth = Ty->getScalarSizeInBits();
	bool IsFloat = Ty->isFPOrFPVectorTy();
	Intrinsic::ID IID;
	if (VecWidth == 128 && EltWidth == 32 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
	else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_d_128;
	else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
	else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_q_128;
	else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
	else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_d_256;
	else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
	else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_q_256;
	else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
	else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_d_512;
	else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
	else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
	IID = Intrinsic::x86_avx512_vpermi2var_q_512;
	else if (VecWidth == 128 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
	else if (VecWidth == 256 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
	else if (VecWidth == 512 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
	else if (VecWidth == 128 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
	else if (VecWidth == 256 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
	else if (VecWidth == 512 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
	CI.getArgOperand(2) };

	// If this isn't index form we need to swap operand 0 and 1.
	if (!IndexForm)
	std::swap(Args[0], Args[1]);

	Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
	Args);
	Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
	: Builder.CreateBitCast(CI.getArgOperand(1),
	Ty);
	return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
	}

	static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI,
	Intrinsic::ID IID) {
	Type *Ty = CI.getType();
	Value *Op0 = CI.getOperand(0);
	Value *Op1 = CI.getOperand(1);
	Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
	Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});

	if (CI.arg_size() == 4) { // For masked intrinsics.
	Value *VecSrc = CI.getOperand(2);
	Value *Mask = CI.getOperand(3);
	Res = EmitX86Select(Builder, Mask, Res, VecSrc);
	}
	return Res;
	}

	static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI,
	bool IsRotateRight) {
	Type *Ty = CI.getType();
	Value *Src = CI.getArgOperand(0);
	Value *Amt = CI.getArgOperand(1);

	// Amount may be scalar immediate, in which case create a splat vector.
	// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
	// we only care about the lowest log2 bits anyway.
	if (Amt->getType() != Ty) {
	unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
	Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
	Amt = Builder.CreateVectorSplat(NumElts, Amt);
	}

	Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
	Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
	Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});

	if (CI.arg_size() == 4) { // For masked intrinsics.
	Value *VecSrc = CI.getOperand(2);
	Value *Mask = CI.getOperand(3);
	Res = EmitX86Select(Builder, Mask, Res, VecSrc);
	}
	return Res;
	}

	static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
	bool IsSigned) {
	Type *Ty = CI.getType();
	Value *LHS = CI.getArgOperand(0);
	Value *RHS = CI.getArgOperand(1);

	CmpInst::Predicate Pred;
	switch (Imm) {
	case 0x0:
	Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
	break;
	case 0x1:
	Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
	break;
	case 0x2:
	Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
	break;
	case 0x3:
	Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
	break;
	case 0x4:
	Pred = ICmpInst::ICMP_EQ;
	break;
	case 0x5:
	Pred = ICmpInst::ICMP_NE;
	break;
	case 0x6:
	return Constant::getNullValue(Ty); // FALSE
	case 0x7:
	return Constant::getAllOnesValue(Ty); // TRUE
	default:
	llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
	}

	Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
	Value *Ext = Builder.CreateSExt(Cmp, Ty);
	return Ext;
	}

	static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI,
	bool IsShiftRight, bool ZeroMask) {
	Type *Ty = CI.getType();
	Value *Op0 = CI.getArgOperand(0);
	Value *Op1 = CI.getArgOperand(1);
	Value *Amt = CI.getArgOperand(2);

	if (IsShiftRight)
	std::swap(Op0, Op1);

	// Amount may be scalar immediate, in which case create a splat vector.
	// Funnel shifts amounts are treated as modulo and types are all power-of-2 so
	// we only care about the lowest log2 bits anyway.
	if (Amt->getType() != Ty) {
	unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
	Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
	Amt = Builder.CreateVectorSplat(NumElts, Amt);
	}

	Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
	Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
	Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});

	unsigned NumArgs = CI.arg_size();
	if (NumArgs >= 4) { // For masked intrinsics.
	Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
	ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
	CI.getArgOperand(0);
	Value *Mask = CI.getOperand(NumArgs - 1);
	Res = EmitX86Select(Builder, Mask, Res, VecSrc);
	}
	return Res;
	}

	static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
	Value Ptr, Value Data, Value *Mask,
	bool Aligned) {
	// Cast the pointer to the right type.
	Ptr = Builder.CreateBitCast(Ptr,
	llvm::PointerType::getUnqual(Data->getType()));
	const Align Alignment =
	Aligned
	? Align(Data->getType()->getPrimitiveSizeInBits().getFixedSize() / 8)
	: Align(1);

	// If the mask is all ones just emit a regular store.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Builder.CreateAlignedStore(Data, Ptr, Alignment);

	// Convert the mask from an integer type to a vector of i1.
	unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
	Mask = getX86MaskVec(Builder, Mask, NumElts);
	return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
	}

	static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
	Value Ptr, Value Passthru, Value *Mask,
	bool Aligned) {
	Type *ValTy = Passthru->getType();
	// Cast the pointer to the right type.
	Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
	const Align Alignment =
	Aligned
	? Align(Passthru->getType()->getPrimitiveSizeInBits().getFixedSize() /
	8)
	: Align(1);

	// If the mask is all ones just emit a regular store.
	if (const auto *C = dyn_cast<Constant>(Mask))
	if (C->isAllOnesValue())
	return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);

	// Convert the mask from an integer type to a vector of i1.
	unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
	Mask = getX86MaskVec(Builder, Mask, NumElts);
	return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
	}

	static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
	Type *Ty = CI.getType();
	Value *Op0 = CI.getArgOperand(0);
	Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
	Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
	if (CI.arg_size() == 3)
	Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
	return Res;
	}

	static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
	Type *Ty = CI.getType();

	// Arguments have a vXi32 type so cast to vXi64.
	Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
	Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);

	if (IsSigned) {
	// Shift left then arithmetic shift right.
	Constant *ShiftAmt = ConstantInt::get(Ty, 32);
	LHS = Builder.CreateShl(LHS, ShiftAmt);
	LHS = Builder.CreateAShr(LHS, ShiftAmt);
	RHS = Builder.CreateShl(RHS, ShiftAmt);
	RHS = Builder.CreateAShr(RHS, ShiftAmt);
	} else {
	// Clear the upper bits.
	Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
	LHS = Builder.CreateAnd(LHS, Mask);
	RHS = Builder.CreateAnd(RHS, Mask);
	}

	Value *Res = Builder.CreateMul(LHS, RHS);

	if (CI.arg_size() == 4)
	Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));

	return Res;
	}

	// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
	static Value ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value Vec,
	Value *Mask) {
	unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
	if (Mask) {
	const auto *C = dyn_cast<Constant>(Mask);
	if (!C \|\| !C->isAllOnesValue())
	Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
	}

	if (NumElts < 8) {
	int Indices[8];
	for (unsigned i = 0; i != NumElts; ++i)
	Indices[i] = i;
	for (unsigned i = NumElts; i != 8; ++i)
	Indices[i] = NumElts + i % NumElts;
	Vec = Builder.CreateShuffleVector(Vec,
	Constant::getNullValue(Vec->getType()),
	Indices);
	}
	return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
	}

	static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI,
	unsigned CC, bool Signed) {
	Value *Op0 = CI.getArgOperand(0);
	unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();

	Value *Cmp;
	if (CC == 3) {
	Cmp = Constant::getNullValue(
	FixedVectorType::get(Builder.getInt1Ty(), NumElts));
	} else if (CC == 7) {
	Cmp = Constant::getAllOnesValue(
	FixedVectorType::get(Builder.getInt1Ty(), NumElts));
	} else {
	ICmpInst::Predicate Pred;
	switch (CC) {
	default: llvm_unreachable("Unknown condition code");
	case 0: Pred = ICmpInst::ICMP_EQ; break;
	case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
	case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
	case 4: Pred = ICmpInst::ICMP_NE; break;
	case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
	case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
	}
	Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
	}

	Value *Mask = CI.getArgOperand(CI.arg_size() - 1);

	return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
	}

	// Replace a masked intrinsic with an older unmasked intrinsic.
	static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI,
	Intrinsic::ID IID) {
	Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
	Value *Rep = Builder.CreateCall(Intrin,
	{ CI.getArgOperand(0), CI.getArgOperand(1) });
	return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
	}

	static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) {
	Value* A = CI.getArgOperand(0);
	Value* B = CI.getArgOperand(1);
	Value* Src = CI.getArgOperand(2);
	Value* Mask = CI.getArgOperand(3);

	Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
	Value* Cmp = Builder.CreateIsNotNull(AndNode);
	Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
	Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
	Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
	return Builder.CreateInsertElement(A, Select, (uint64_t)0);
	}


	static Value* UpgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) {
	Value* Op = CI.getArgOperand(0);
	Type* ReturnOp = CI.getType();
	unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
	Value *Mask = getX86MaskVec(Builder, Op, NumElts);
	return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
	}

	// Replace intrinsic with unmasked version and a select.
	static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
	CallBase &CI, Value *&Rep) {
	Name = Name.substr(12); // Remove avx512.mask.

	unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
	unsigned EltWidth = CI.getType()->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (Name.startswith("max.p")) {
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_sse_max_ps;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_sse2_max_pd;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx_max_ps_256;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx_max_pd_256;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("min.p")) {
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_sse_min_ps;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_sse2_min_pd;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx_min_ps_256;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx_min_pd_256;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pshuf.b.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_ssse3_pshuf_b_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pshuf_b;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pshuf_b_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmul.hr.sw.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pmul_hr_sw;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmulh.w.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_pmulh_w;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pmulh_w;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmulh_w_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmulhu.w.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_pmulhu_w;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pmulhu_w;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmulhu_w_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmaddw.d.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_pmadd_wd;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pmadd_wd;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmaddw_d_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmaddubs.w.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_pmadd_ub_sw;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmaddubs_w_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("packsswb.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_packsswb_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_packsswb;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_packsswb_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("packssdw.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_packssdw_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_packssdw;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_packssdw_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("packuswb.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse2_packuswb_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_packuswb;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_packuswb_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("packusdw.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_sse41_packusdw;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx2_packusdw;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_packusdw_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("vpermilvar.")) {
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_avx_vpermilvar_ps;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_avx_vpermilvar_pd;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx_vpermilvar_ps_256;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx_vpermilvar_pd_256;
	else if (VecWidth == 512 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
	else if (VecWidth == 512 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name == "cvtpd2dq.256") {
	IID = Intrinsic::x86_avx_cvt_pd2dq_256;
	} else if (Name == "cvtpd2ps.256") {
	IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
	} else if (Name == "cvttpd2dq.256") {
	IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
	} else if (Name == "cvttps2dq.128") {
	IID = Intrinsic::x86_sse2_cvttps2dq;
	} else if (Name == "cvttps2dq.256") {
	IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
	} else if (Name.startswith("permvar.")) {
	bool IsFloat = CI.getType()->isFPOrFPVectorTy();
	if (VecWidth == 256 && EltWidth == 32 && IsFloat)
	IID = Intrinsic::x86_avx2_permps;
	else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
	IID = Intrinsic::x86_avx2_permd;
	else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
	IID = Intrinsic::x86_avx512_permvar_df_256;
	else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
	IID = Intrinsic::x86_avx512_permvar_di_256;
	else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
	IID = Intrinsic::x86_avx512_permvar_sf_512;
	else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
	IID = Intrinsic::x86_avx512_permvar_si_512;
	else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
	IID = Intrinsic::x86_avx512_permvar_df_512;
	else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
	IID = Intrinsic::x86_avx512_permvar_di_512;
	else if (VecWidth == 128 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_permvar_hi_128;
	else if (VecWidth == 256 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_permvar_hi_256;
	else if (VecWidth == 512 && EltWidth == 16)
	IID = Intrinsic::x86_avx512_permvar_hi_512;
	else if (VecWidth == 128 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_permvar_qi_128;
	else if (VecWidth == 256 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_permvar_qi_256;
	else if (VecWidth == 512 && EltWidth == 8)
	IID = Intrinsic::x86_avx512_permvar_qi_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("dbpsadbw.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_avx512_dbpsadbw_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx512_dbpsadbw_256;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_dbpsadbw_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pmultishift.qb.")) {
	if (VecWidth == 128)
	IID = Intrinsic::x86_avx512_pmultishift_qb_128;
	else if (VecWidth == 256)
	IID = Intrinsic::x86_avx512_pmultishift_qb_256;
	else if (VecWidth == 512)
	IID = Intrinsic::x86_avx512_pmultishift_qb_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("conflict.")) {
	if (Name[9] == 'd' && VecWidth == 128)
	IID = Intrinsic::x86_avx512_conflict_d_128;
	else if (Name[9] == 'd' && VecWidth == 256)
	IID = Intrinsic::x86_avx512_conflict_d_256;
	else if (Name[9] == 'd' && VecWidth == 512)
	IID = Intrinsic::x86_avx512_conflict_d_512;
	else if (Name[9] == 'q' && VecWidth == 128)
	IID = Intrinsic::x86_avx512_conflict_q_128;
	else if (Name[9] == 'q' && VecWidth == 256)
	IID = Intrinsic::x86_avx512_conflict_q_256;
	else if (Name[9] == 'q' && VecWidth == 512)
	IID = Intrinsic::x86_avx512_conflict_q_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else if (Name.startswith("pavg.")) {
	if (Name[5] == 'b' && VecWidth == 128)
	IID = Intrinsic::x86_sse2_pavg_b;
	else if (Name[5] == 'b' && VecWidth == 256)
	IID = Intrinsic::x86_avx2_pavg_b;
	else if (Name[5] == 'b' && VecWidth == 512)
	IID = Intrinsic::x86_avx512_pavg_b_512;
	else if (Name[5] == 'w' && VecWidth == 128)
	IID = Intrinsic::x86_sse2_pavg_w;
	else if (Name[5] == 'w' && VecWidth == 256)
	IID = Intrinsic::x86_avx2_pavg_w;
	else if (Name[5] == 'w' && VecWidth == 512)
	IID = Intrinsic::x86_avx512_pavg_w_512;
	else
	llvm_unreachable("Unexpected intrinsic");
	} else
	return false;

	SmallVector<Value *, 4> Args(CI.args());
	Args.pop_back();
	Args.pop_back();
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
	Args);
	unsigned NumArgs = CI.arg_size();
	Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
	CI.getArgOperand(NumArgs - 2));
	return true;
	}

	/// Upgrade comment in call to inline asm that represents an objc retain release
	/// marker.
	void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
	size_t Pos;
	if (AsmStr->find("mov\tfp") == 0 &&
	AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
	(Pos = AsmStr->find("# marker")) != std::string::npos) {
	AsmStr->replace(Pos, 1, ";");
	}
	}

	static Value UpgradeARMIntrinsicCall(StringRef Name, CallBase CI, Function *F,
	IRBuilder<> &Builder) {
	if (Name == "mve.vctp64.old") {
	// Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
	// correct type.
	Value *VCTP = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::arm_mve_vctp64),
	CI->getArgOperand(0), CI->getName());
	Value *C1 = Builder.CreateCall(
	Intrinsic::getDeclaration(
	F->getParent(), Intrinsic::arm_mve_pred_v2i,
	{VectorType::get(Builder.getInt1Ty(), 2, false)}),
	VCTP);
	return Builder.CreateCall(
	Intrinsic::getDeclaration(
	F->getParent(), Intrinsic::arm_mve_pred_i2v,
	{VectorType::get(Builder.getInt1Ty(), 4, false)}),
	C1);
	} else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" \|\|
	Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" \|\|
	Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" \|\|
	Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" \|\|
	Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" \|\|
	Name == "cde.vcx1q.predicated.v2i64.v4i1" \|\|
	Name == "cde.vcx1qa.predicated.v2i64.v4i1" \|\|
	Name == "cde.vcx2q.predicated.v2i64.v4i1" \|\|
	Name == "cde.vcx2qa.predicated.v2i64.v4i1" \|\|
	Name == "cde.vcx3q.predicated.v2i64.v4i1" \|\|
	Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
	std::vector<Type *> Tys;
	unsigned ID = CI->getIntrinsicID();
	Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
	switch (ID) {
	case Intrinsic::arm_mve_mull_int_predicated:
	case Intrinsic::arm_mve_vqdmull_predicated:
	case Intrinsic::arm_mve_vldr_gather_base_predicated:
	Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
	break;
	case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
	case Intrinsic::arm_mve_vstr_scatter_base_predicated:
	case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
	Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
	V2I1Ty};
	break;
	case Intrinsic::arm_mve_vldr_gather_offset_predicated:
	Tys = {CI->getType(), CI->getOperand(0)->getType(),
	CI->getOperand(1)->getType(), V2I1Ty};
	break;
	case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
	Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
	CI->getOperand(2)->getType(), V2I1Ty};
	break;
	case Intrinsic::arm_cde_vcx1q_predicated:
	case Intrinsic::arm_cde_vcx1qa_predicated:
	case Intrinsic::arm_cde_vcx2q_predicated:
	case Intrinsic::arm_cde_vcx2qa_predicated:
	case Intrinsic::arm_cde_vcx3q_predicated:
	case Intrinsic::arm_cde_vcx3qa_predicated:
	Tys = {CI->getOperand(1)->getType(), V2I1Ty};
	break;
	default:
	llvm_unreachable("Unhandled Intrinsic!");
	}

	std::vector<Value *> Ops;
	for (Value *Op : CI->args()) {
	Type *Ty = Op->getType();
	if (Ty->getScalarSizeInBits() == 1) {
	Value *C1 = Builder.CreateCall(
	Intrinsic::getDeclaration(
	F->getParent(), Intrinsic::arm_mve_pred_v2i,
	{VectorType::get(Builder.getInt1Ty(), 4, false)}),
	Op);
	Op = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::arm_mve_pred_i2v, {V2I1Ty}),
	C1);
	}
	Ops.push_back(Op);
	}

	Function *Fn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
	return Builder.CreateCall(Fn, Ops, CI->getName());
	}
	llvm_unreachable("Unknown function for ARM CallBase upgrade.");
	}

	/// Upgrade a call to an old intrinsic. All argument and return casting must be
	/// provided to seamlessly integrate with existing context.
	void llvm::UpgradeIntrinsicCall(CallBase CI, Function NewFn) {
	Function *F = CI->getCalledFunction();
	LLVMContext &C = CI->getContext();
	IRBuilder<> Builder(C);
	Builder.SetInsertPoint(CI->getParent(), CI->getIterator());

	assert(F && "Intrinsic call is not direct?");

	if (!NewFn) {
	// Get the Function's name.
	StringRef Name = F->getName();

	assert(Name.startswith("llvm.") && "Intrinsic doesn't start with 'llvm.'");
	Name = Name.substr(5);

	bool IsX86 = Name.startswith("x86.");
	if (IsX86)
	Name = Name.substr(4);
	bool IsNVVM = Name.startswith("nvvm.");
	if (IsNVVM)
	Name = Name.substr(5);
	bool IsARM = Name.startswith("arm.");
	if (IsARM)
	Name = Name.substr(4);

	if (IsX86 && Name.startswith("sse4a.movnt.")) {
	Module *M = F->getParent();
	SmallVector<Metadata *, 1> Elts;
	Elts.push_back(
	ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
	MDNode *Node = MDNode::get(C, Elts);

	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	// Nontemporal (unaligned) store of the 0'th element of the float/double
	// vector.
	Type *SrcEltTy = cast<VectorType>(Arg1->getType())->getElementType();
	PointerType *EltPtrTy = PointerType::getUnqual(SrcEltTy);
	Value *Addr = Builder.CreateBitCast(Arg0, EltPtrTy, "cast");
	Value *Extract =
	Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");

	StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
	SI->setMetadata(M->getMDKindID("nontemporal"), Node);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("avx.movnt.") \|\|
	Name.startswith("avx512.storent."))) {
	Module *M = F->getParent();
	SmallVector<Metadata *, 1> Elts;
	Elts.push_back(
	ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
	MDNode *Node = MDNode::get(C, Elts);

	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	// Convert the type of the pointer to a pointer to the stored type.
	Value *BC = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Arg1->getType()),
	"cast");
	StoreInst *SI = Builder.CreateAlignedStore(
	Arg1, BC,
	Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
	SI->setMetadata(M->getMDKindID("nontemporal"), Node);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && Name == "sse2.storel.dq") {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
	Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
	Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
	Value *BC = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Elt->getType()),
	"cast");
	Builder.CreateAlignedStore(Elt, BC, Align(1));

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("sse.storeu.") \|\|
	Name.startswith("sse2.storeu.") \|\|
	Name.startswith("avx.storeu."))) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);

	Arg0 = Builder.CreateBitCast(Arg0,
	PointerType::getUnqual(Arg1->getType()),
	"cast");
	Builder.CreateAlignedStore(Arg1, Arg0, Align(1));

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && Name == "avx512.mask.store.ss") {
	Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
	UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
	Mask, false);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	if (IsX86 && (Name.startswith("avx512.mask.store"))) {
	// "avx512.mask.storeu." or "avx512.mask.store."
	bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
	UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2), Aligned);

	// Remove intrinsic.
	CI->eraseFromParent();
	return;
	}

	Value *Rep;
	// Upgrade packed integer vector compare intrinsics to compare instructions.
	if (IsX86 && (Name.startswith("sse2.pcmp") \|\|
	Name.startswith("avx2.pcmp"))) {
	// "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
	bool CmpEq = Name[9] == 'e';
	Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
	CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = Builder.CreateSExt(Rep, CI->getType(), "");
	} else if (IsX86 && (Name.startswith("avx512.broadcastm"))) {
	Type *ExtTy = Type::getInt32Ty(C);
	if (CI->getOperand(0)->getType()->isIntegerTy(8))
	ExtTy = Type::getInt64Ty(C);
	unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
	ExtTy->getPrimitiveSizeInBits();
	Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
	Rep = Builder.CreateVectorSplat(NumElts, Rep);
	} else if (IsX86 && (Name == "sse.sqrt.ss" \|\|
	Name == "sse2.sqrt.sd")) {
	Value *Vec = CI->getArgOperand(0);
	Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
	Function *Intr = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::sqrt, Elt0->getType());
	Elt0 = Builder.CreateCall(Intr, Elt0);
	Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
	} else if (IsX86 && (Name.startswith("avx.sqrt.p") \|\|
	Name.startswith("sse2.sqrt.p") \|\|
	Name.startswith("sse.sqrt.p"))) {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::sqrt,
	CI->getType()),
	{CI->getArgOperand(0)});
	} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
	if (CI->arg_size() == 4 &&
	(!isa<ConstantInt>(CI->getArgOperand(3)) \|\|
	cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
	Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
	: Intrinsic::x86_avx512_sqrt_pd_512;

	Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
	IID), Args);
	} else {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::sqrt,
	CI->getType()),
	{CI->getArgOperand(0)});
	}
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.ptestm") \|\|
	Name.startswith("avx512.ptestnm"))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	Value *Mask = CI->getArgOperand(2);
	Rep = Builder.CreateAnd(Op0, Op1);
	llvm::Type *Ty = Op0->getType();
	Value *Zero = llvm::Constant::getNullValue(Ty);
	ICmpInst::Predicate Pred =
	Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
	Rep = Builder.CreateICmp(Pred, Rep, Zero);
	Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
	} else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
	unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
	->getNumElements();
	Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
	unsigned NumElts = CI->getType()->getScalarSizeInBits();
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
	int Indices[64];
	for (unsigned i = 0; i != NumElts; ++i)
	Indices[i] = i;

	// First extract half of each vector. This gives better codegen than
	// doing it in a single shuffle.
	LHS = Builder.CreateShuffleVector(LHS, LHS,
	makeArrayRef(Indices, NumElts / 2));
	RHS = Builder.CreateShuffleVector(RHS, RHS,
	makeArrayRef(Indices, NumElts / 2));
	// Concat the vectors.
	// NOTE: Operands have to be swapped to match intrinsic definition.
	Rep = Builder.CreateShuffleVector(RHS, LHS,
	makeArrayRef(Indices, NumElts));
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.kand.w") {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	Rep = Builder.CreateAnd(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.kandn.w") {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	LHS = Builder.CreateNot(LHS);
	Rep = Builder.CreateAnd(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.kor.w") {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	Rep = Builder.CreateOr(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.kxor.w") {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	Rep = Builder.CreateXor(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.kxnor.w") {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	LHS = Builder.CreateNot(LHS);
	Rep = Builder.CreateXor(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 && Name == "avx512.knot.w") {
	Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Rep = Builder.CreateNot(Rep);
	Rep = Builder.CreateBitCast(Rep, CI->getType());
	} else if (IsX86 &&
	(Name == "avx512.kortestz.w" \|\| Name == "avx512.kortestc.w")) {
	Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
	Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
	Rep = Builder.CreateOr(LHS, RHS);
	Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
	Value *C;
	if (Name[14] == 'c')
	C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
	else
	C = ConstantInt::getNullValue(Builder.getInt16Ty());
	Rep = Builder.CreateICmpEQ(Rep, C);
	Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
	} else if (IsX86 && (Name == "sse.add.ss" \|\| Name == "sse2.add.sd" \|\|
	Name == "sse.sub.ss" \|\| Name == "sse2.sub.sd" \|\|
	Name == "sse.mul.ss" \|\| Name == "sse2.mul.sd" \|\|
	Name == "sse.div.ss" \|\| Name == "sse2.div.sd")) {
	Type *I32Ty = Type::getInt32Ty(C);
	Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
	ConstantInt::get(I32Ty, 0));
	Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
	ConstantInt::get(I32Ty, 0));
	Value *EltOp;
	if (Name.contains(".add."))
	EltOp = Builder.CreateFAdd(Elt0, Elt1);
	else if (Name.contains(".sub."))
	EltOp = Builder.CreateFSub(Elt0, Elt1);
	else if (Name.contains(".mul."))
	EltOp = Builder.CreateFMul(Elt0, Elt1);
	else
	EltOp = Builder.CreateFDiv(Elt0, Elt1);
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
	ConstantInt::get(I32Ty, 0));
	} else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
	// "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
	bool CmpEq = Name[16] == 'e';
	Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
	} else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
	Type *OpTy = CI->getArgOperand(0)->getType();
	unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
	Intrinsic::ID IID;
	switch (VecWidth) {
	default: llvm_unreachable("Unexpected intrinsic");
	case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
	case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
	case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
	}

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getOperand(0), CI->getArgOperand(1) });
	Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
	Type *OpTy = CI->getArgOperand(0)->getType();
	unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
	unsigned EltWidth = OpTy->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_fpclass_ps_128;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_fpclass_ps_256;
	else if (VecWidth == 512 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_fpclass_ps_512;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_fpclass_pd_128;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_fpclass_pd_256;
	else if (VecWidth == 512 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_fpclass_pd_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getOperand(0), CI->getArgOperand(1) });
	Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.cmp.p")) {
	SmallVector<Value *, 4> Args(CI->args());
	Type *OpTy = Args[0]->getType();
	unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
	unsigned EltWidth = OpTy->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
	else if (VecWidth == 512 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
	else if (VecWidth == 512 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Mask = Constant::getAllOnesValue(CI->getType());
	if (VecWidth == 512)
	std::swap(Mask, Args.back());
	Args.push_back(Mask);

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	Args);
	} else if (IsX86 && Name.startswith("avx512.mask.cmp.")) {
	// Integer compare intrinsics.
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
	} else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
	} else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") \|\|
	Name.startswith("avx512.cvtw2mask.") \|\|
	Name.startswith("avx512.cvtd2mask.") \|\|
	Name.startswith("avx512.cvtq2mask."))) {
	Value *Op = CI->getArgOperand(0);
	Value *Zero = llvm::Constant::getNullValue(Op->getType());
	Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
	Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
	} else if(IsX86 && (Name == "ssse3.pabs.b.128" \|\|
	Name == "ssse3.pabs.w.128" \|\|
	Name == "ssse3.pabs.d.128" \|\|
	Name.startswith("avx2.pabs") \|\|
	Name.startswith("avx512.mask.pabs"))) {
	Rep = upgradeAbs(Builder, *CI);
	} else if (IsX86 && (Name == "sse41.pmaxsb" \|\|
	Name == "sse2.pmaxs.w" \|\|
	Name == "sse41.pmaxsd" \|\|
	Name.startswith("avx2.pmaxs") \|\|
	Name.startswith("avx512.mask.pmaxs"))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
	} else if (IsX86 && (Name == "sse2.pmaxu.b" \|\|
	Name == "sse41.pmaxuw" \|\|
	Name == "sse41.pmaxud" \|\|
	Name.startswith("avx2.pmaxu") \|\|
	Name.startswith("avx512.mask.pmaxu"))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
	} else if (IsX86 && (Name == "sse41.pminsb" \|\|
	Name == "sse2.pmins.w" \|\|
	Name == "sse41.pminsd" \|\|
	Name.startswith("avx2.pmins") \|\|
	Name.startswith("avx512.mask.pmins"))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
	} else if (IsX86 && (Name == "sse2.pminu.b" \|\|
	Name == "sse41.pminuw" \|\|
	Name == "sse41.pminud" \|\|
	Name.startswith("avx2.pminu") \|\|
	Name.startswith("avx512.mask.pminu"))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
	} else if (IsX86 && (Name == "sse2.pmulu.dq" \|\|
	Name == "avx2.pmulu.dq" \|\|
	Name == "avx512.pmulu.dq.512" \|\|
	Name.startswith("avx512.mask.pmulu.dq."))) {
	Rep = upgradePMULDQ(Builder, CI, /Signed*/false);
	} else if (IsX86 && (Name == "sse41.pmuldq" \|\|
	Name == "avx2.pmul.dq" \|\|
	Name == "avx512.pmul.dq.512" \|\|
	Name.startswith("avx512.mask.pmul.dq."))) {
	Rep = upgradePMULDQ(Builder, CI, /Signed*/true);
	} else if (IsX86 && (Name == "sse.cvtsi2ss" \|\|
	Name == "sse2.cvtsi2sd" \|\|
	Name == "sse.cvtsi642ss" \|\|
	Name == "sse2.cvtsi642sd")) {
	Rep = Builder.CreateSIToFP(
	CI->getArgOperand(1),
	cast<VectorType>(CI->getType())->getElementType());
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
	} else if (IsX86 && Name == "avx512.cvtusi2sd") {
	Rep = Builder.CreateUIToFP(
	CI->getArgOperand(1),
	cast<VectorType>(CI->getType())->getElementType());
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
	} else if (IsX86 && Name == "sse2.cvtss2sd") {
	Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
	Rep = Builder.CreateFPExt(
	Rep, cast<VectorType>(CI->getType())->getElementType());
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
	} else if (IsX86 && (Name == "sse2.cvtdq2pd" \|\|
	Name == "sse2.cvtdq2ps" \|\|
	Name == "avx.cvtdq2.pd.256" \|\|
	Name == "avx.cvtdq2.ps.256" \|\|
	Name.startswith("avx512.mask.cvtdq2pd.") \|\|
	Name.startswith("avx512.mask.cvtudq2pd.") \|\|
	Name.startswith("avx512.mask.cvtdq2ps.") \|\|
	Name.startswith("avx512.mask.cvtudq2ps.") \|\|
	Name.startswith("avx512.mask.cvtqq2pd.") \|\|
	Name.startswith("avx512.mask.cvtuqq2pd.") \|\|
	Name == "avx512.mask.cvtqq2ps.256" \|\|
	Name == "avx512.mask.cvtqq2ps.512" \|\|
	Name == "avx512.mask.cvtuqq2ps.256" \|\|
	Name == "avx512.mask.cvtuqq2ps.512" \|\|
	Name == "sse2.cvtps2pd" \|\|
	Name == "avx.cvt.ps2.pd.256" \|\|
	Name == "avx512.mask.cvtps2pd.128" \|\|
	Name == "avx512.mask.cvtps2pd.256")) {
	auto *DstTy = cast<FixedVectorType>(CI->getType());
	Rep = CI->getArgOperand(0);
	auto *SrcTy = cast<FixedVectorType>(Rep->getType());

	unsigned NumDstElts = DstTy->getNumElements();
	if (NumDstElts < SrcTy->getNumElements()) {
	assert(NumDstElts == 2 && "Unexpected vector size");
	Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
	}

	bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
	bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
	if (IsPS2PD)
	Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
	else if (CI->arg_size() == 4 &&
	(!isa<ConstantInt>(CI->getArgOperand(3)) \|\|
	cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
	Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
	: Intrinsic::x86_avx512_sitofp_round;
	Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
	{ DstTy, SrcTy });
	Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
	} else {
	Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
	: Builder.CreateSIToFP(Rep, DstTy, "cvt");
	}

	if (CI->arg_size() >= 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") \|\|
	Name.startswith("vcvtph2ps."))) {
	auto *DstTy = cast<FixedVectorType>(CI->getType());
	Rep = CI->getArgOperand(0);
	auto *SrcTy = cast<FixedVectorType>(Rep->getType());
	unsigned NumDstElts = DstTy->getNumElements();
	if (NumDstElts != SrcTy->getNumElements()) {
	assert(NumDstElts == 4 && "Unexpected vector size");
	Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
	}
	Rep = Builder.CreateBitCast(
	Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
	Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
	if (CI->arg_size() >= 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && Name.startswith("avx512.mask.load")) {
	// "avx512.mask.loadu." or "avx512.mask.load."
	bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
	Rep =
	UpgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2), Aligned);
	} else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
	auto *ResultTy = cast<FixedVectorType>(CI->getType());
	Type *PtrTy = ResultTy->getElementType();

	// Cast the pointer to element type.
	Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
	llvm::PointerType::getUnqual(PtrTy));

	Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
	ResultTy->getNumElements());

	Function *ELd = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_expandload,
	ResultTy);
	Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
	} else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
	auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
	Type *PtrTy = ResultTy->getElementType();

	// Cast the pointer to element type.
	Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
	llvm::PointerType::getUnqual(PtrTy));

	Value *MaskVec =
	getX86MaskVec(Builder, CI->getArgOperand(2),
	cast<FixedVectorType>(ResultTy)->getNumElements());

	Function *CSt = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::masked_compressstore,
	ResultTy);
	Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
	} else if (IsX86 && (Name.startswith("avx512.mask.compress.") \|\|
	Name.startswith("avx512.mask.expand."))) {
	auto *ResultTy = cast<FixedVectorType>(CI->getType());

	Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
	ResultTy->getNumElements());

	bool IsCompress = Name[12] == 'c';
	Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
	: Intrinsic::x86_avx512_mask_expand;
	Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
	Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
	MaskVec });
	} else if (IsX86 && Name.startswith("xop.vpcom")) {
	bool IsSigned;
	if (Name.endswith("ub") \|\| Name.endswith("uw") \|\| Name.endswith("ud") \|\|
	Name.endswith("uq"))
	IsSigned = false;
	else if (Name.endswith("b") \|\| Name.endswith("w") \|\| Name.endswith("d") \|\|
	Name.endswith("q"))
	IsSigned = true;
	else
	llvm_unreachable("Unknown suffix");

	unsigned Imm;
	if (CI->arg_size() == 3) {
	Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	} else {
	Name = Name.substr(9); // strip off "xop.vpcom"
	if (Name.startswith("lt"))
	Imm = 0;
	else if (Name.startswith("le"))
	Imm = 1;
	else if (Name.startswith("gt"))
	Imm = 2;
	else if (Name.startswith("ge"))
	Imm = 3;
	else if (Name.startswith("eq"))
	Imm = 4;
	else if (Name.startswith("ne"))
	Imm = 5;
	else if (Name.startswith("false"))
	Imm = 6;
	else if (Name.startswith("true"))
	Imm = 7;
	else
	llvm_unreachable("Unknown condition");
	}

	Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
	} else if (IsX86 && Name.startswith("xop.vpcmov")) {
	Value *Sel = CI->getArgOperand(2);
	Value *NotSel = Builder.CreateNot(Sel);
	Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
	Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
	Rep = Builder.CreateOr(Sel0, Sel1);
	} else if (IsX86 && (Name.startswith("xop.vprot") \|\|
	Name.startswith("avx512.prol") \|\|
	Name.startswith("avx512.mask.prol"))) {
	Rep = upgradeX86Rotate(Builder, *CI, false);
	} else if (IsX86 && (Name.startswith("avx512.pror") \|\|
	Name.startswith("avx512.mask.pror"))) {
	Rep = upgradeX86Rotate(Builder, *CI, true);
	} else if (IsX86 && (Name.startswith("avx512.vpshld.") \|\|
	Name.startswith("avx512.mask.vpshld") \|\|
	Name.startswith("avx512.maskz.vpshld"))) {
	bool ZeroMask = Name[11] == 'z';
	Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
	} else if (IsX86 && (Name.startswith("avx512.vpshrd.") \|\|
	Name.startswith("avx512.mask.vpshrd") \|\|
	Name.startswith("avx512.maskz.vpshrd"))) {
	bool ZeroMask = Name[11] == 'z';
	Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
	} else if (IsX86 && Name == "sse42.crc32.64.8") {
	Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::x86_sse42_crc32_32_8);
	Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
	Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
	Rep = Builder.CreateZExt(Rep, CI->getType(), "");
	} else if (IsX86 && (Name.startswith("avx.vbroadcast.s") \|\|
	Name.startswith("avx512.vbroadcast.s"))) {
	// Replace broadcasts with a series of insertelements.
	auto *VecTy = cast<FixedVectorType>(CI->getType());
	Type *EltTy = VecTy->getElementType();
	unsigned EltNum = VecTy->getNumElements();
	Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
	EltTy->getPointerTo());
	Value *Load = Builder.CreateLoad(EltTy, Cast);
	Type *I32Ty = Type::getInt32Ty(C);
	Rep = PoisonValue::get(VecTy);
	for (unsigned I = 0; I < EltNum; ++I)
	Rep = Builder.CreateInsertElement(Rep, Load,
	ConstantInt::get(I32Ty, I));
	} else if (IsX86 && (Name.startswith("sse41.pmovsx") \|\|
	Name.startswith("sse41.pmovzx") \|\|
	Name.startswith("avx2.pmovsx") \|\|
	Name.startswith("avx2.pmovzx") \|\|
	Name.startswith("avx512.mask.pmovsx") \|\|
	Name.startswith("avx512.mask.pmovzx"))) {
	auto *DstTy = cast<FixedVectorType>(CI->getType());
	unsigned NumDstElts = DstTy->getNumElements();

	// Extract a subvector of the first NumDstElts lanes and sign/zero extend.
	SmallVector<int, 8> ShuffleMask(NumDstElts);
	for (unsigned i = 0; i != NumDstElts; ++i)
	ShuffleMask[i] = i;

	Value *SV =
	Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);

	bool DoSext = (StringRef::npos != Name.find("pmovsx"));
	Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
	: Builder.CreateZExt(SV, DstTy);
	// If there are 3 arguments, it's a masked intrinsic so we need a select.
	if (CI->arg_size() == 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (Name == "avx512.mask.pmov.qd.256" \|\|
	Name == "avx512.mask.pmov.qd.512" \|\|
	Name == "avx512.mask.pmov.wb.256" \|\|
	Name == "avx512.mask.pmov.wb.512") {
	Type *Ty = CI->getArgOperand(1)->getType();
	Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx.vbroadcastf128") \|\|
	Name == "avx2.vbroadcasti128")) {
	// Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
	Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
	unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
	auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
	Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
	PointerType::getUnqual(VT));
	Value *Load = Builder.CreateAlignedLoad(VT, Op, Align(1));
	if (NumSrcElts == 2)
	Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
	else
	Rep = Builder.CreateShuffleVector(
	Load, ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
	} else if (IsX86 && (Name.startswith("avx512.mask.shuf.i") \|\|
	Name.startswith("avx512.mask.shuf.f"))) {
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	Type *VT = CI->getType();
	unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
	unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
	unsigned ControlBitsMask = NumLanes - 1;
	unsigned NumControlBits = NumLanes / 2;
	SmallVector<int, 8> ShuffleMask(0);

	for (unsigned l = 0; l != NumLanes; ++l) {
	unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
	// We actually need the other source.
	if (l >= NumLanes / 2)
	LaneMask += NumLanes;
	for (unsigned i = 0; i != NumElementsInLane; ++i)
	ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
	}
	Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
	CI->getArgOperand(1), ShuffleMask);
	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
	CI->getArgOperand(3));
	}else if (IsX86 && (Name.startswith("avx512.mask.broadcastf") \|\|
	Name.startswith("avx512.mask.broadcasti"))) {
	unsigned NumSrcElts =
	cast<FixedVectorType>(CI->getArgOperand(0)->getType())
	->getNumElements();
	unsigned NumDstElts =
	cast<FixedVectorType>(CI->getType())->getNumElements();

	SmallVector<int, 8> ShuffleMask(NumDstElts);
	for (unsigned i = 0; i != NumDstElts; ++i)
	ShuffleMask[i] = i % NumSrcElts;

	Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
	CI->getArgOperand(0),
	ShuffleMask);
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx2.pbroadcast") \|\|
	Name.startswith("avx2.vbroadcast") \|\|
	Name.startswith("avx512.pbroadcast") \|\|
	Name.startswith("avx512.mask.broadcast.s"))) {
	// Replace vp?broadcasts with a vector shuffle.
	Value *Op = CI->getArgOperand(0);
	ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
	Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
	SmallVector<int, 8> M;
	ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
	Rep = Builder.CreateShuffleVector(Op, M);

	if (CI->arg_size() == 3)
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("sse2.padds.") \|\|
	Name.startswith("avx2.padds.") \|\|
	Name.startswith("avx512.padds.") \|\|
	Name.startswith("avx512.mask.padds."))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
	} else if (IsX86 && (Name.startswith("sse2.psubs.") \|\|
	Name.startswith("avx2.psubs.") \|\|
	Name.startswith("avx512.psubs.") \|\|
	Name.startswith("avx512.mask.psubs."))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
	} else if (IsX86 && (Name.startswith("sse2.paddus.") \|\|
	Name.startswith("avx2.paddus.") \|\|
	Name.startswith("avx512.mask.paddus."))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
	} else if (IsX86 && (Name.startswith("sse2.psubus.") \|\|
	Name.startswith("avx2.psubus.") \|\|
	Name.startswith("avx512.mask.psubus."))) {
	Rep = UpgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
	} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
	Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1),
	CI->getArgOperand(2),
	CI->getArgOperand(3),
	CI->getArgOperand(4),
	false);
	} else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
	Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
	CI->getArgOperand(1),
	CI->getArgOperand(2),
	CI->getArgOperand(3),
	CI->getArgOperand(4),
	true);
	} else if (IsX86 && (Name == "sse2.psll.dq" \|\|
	Name == "avx2.psll.dq")) {
	// 128/256-bit shift left specified in bits.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
	Shift / 8); // Shift is in bits.
	} else if (IsX86 && (Name == "sse2.psrl.dq" \|\|
	Name == "avx2.psrl.dq")) {
	// 128/256-bit shift right specified in bits.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
	Shift / 8); // Shift is in bits.
	} else if (IsX86 && (Name == "sse2.psll.dq.bs" \|\|
	Name == "avx2.psll.dq.bs" \|\|
	Name == "avx512.psll.dq.512")) {
	// 128/256/512-bit shift left specified in bytes.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
	} else if (IsX86 && (Name == "sse2.psrl.dq.bs" \|\|
	Name == "avx2.psrl.dq.bs" \|\|
	Name == "avx512.psrl.dq.512")) {
	// 128/256/512-bit shift right specified in bytes.
	unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	Rep = UpgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
	} else if (IsX86 && (Name == "sse41.pblendw" \|\|
	Name.startswith("sse41.blendp") \|\|
	Name.startswith("avx.blend.p") \|\|
	Name == "avx2.pblendw" \|\|
	Name.startswith("avx2.pblendd."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast <ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	auto *VecTy = cast<FixedVectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();

	SmallVector<int, 16> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i;

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
	} else if (IsX86 && (Name.startswith("avx.vinsertf128.") \|\|
	Name == "avx2.vinserti128" \|\|
	Name.startswith("avx512.mask.insert"))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	unsigned DstNumElts =
	cast<FixedVectorType>(CI->getType())->getNumElements();
	unsigned SrcNumElts =
	cast<FixedVectorType>(Op1->getType())->getNumElements();
	unsigned Scale = DstNumElts / SrcNumElts;

	// Mask off the high bits of the immediate value; hardware ignores those.
	Imm = Imm % Scale;

	// Extend the second operand into a vector the size of the destination.
	SmallVector<int, 8> Idxs(DstNumElts);
	for (unsigned i = 0; i != SrcNumElts; ++i)
	Idxs[i] = i;
	for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
	Idxs[i] = SrcNumElts;
	Rep = Builder.CreateShuffleVector(Op1, Idxs);

	// Insert the second operand into the first operand.

	// Note that there is no guarantee that instruction lowering will actually
	// produce a vinsertf128 instruction for the created shuffles. In
	// particular, the 0 immediate case involves no lane changes, so it can
	// be handled as a blend.

	// Example of shuffle mask for 32-bit elements:
	// Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
	// Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >

	// First fill with identify mask.
	for (unsigned i = 0; i != DstNumElts; ++i)
	Idxs[i] = i;
	// Then replace the elements where we need to insert.
	for (unsigned i = 0; i != SrcNumElts; ++i)
	Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
	Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);

	// If the intrinsic has a mask operand, handle that.
	if (CI->arg_size() == 5)
	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
	CI->getArgOperand(3));
	} else if (IsX86 && (Name.startswith("avx.vextractf128.") \|\|
	Name == "avx2.vextracti128" \|\|
	Name.startswith("avx512.mask.vextract"))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned DstNumElts =
	cast<FixedVectorType>(CI->getType())->getNumElements();
	unsigned SrcNumElts =
	cast<FixedVectorType>(Op0->getType())->getNumElements();
	unsigned Scale = SrcNumElts / DstNumElts;

	// Mask off the high bits of the immediate value; hardware ignores those.
	Imm = Imm % Scale;

	// Get indexes for the subvector of the input vector.
	SmallVector<int, 8> Idxs(DstNumElts);
	for (unsigned i = 0; i != DstNumElts; ++i) {
	Idxs[i] = i + (Imm * DstNumElts);
	}
	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	// If the intrinsic has a mask operand, handle that.
	if (CI->arg_size() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (!IsX86 && Name == "stackprotectorcheck") {
	Rep = nullptr;
	} else if (IsX86 && (Name.startswith("avx512.mask.perm.df.") \|\|
	Name.startswith("avx512.mask.perm.di."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	auto *VecTy = cast<FixedVectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();

	SmallVector<int, 8> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->arg_size() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx.vperm2f128.") \|\|
	Name == "avx2.vperm2i128")) {
	// The immediate permute control byte looks like this:
	// [1:0] - select 128 bits from sources for low half of destination
	// [2] - ignore
	// [3] - zero low half of destination
	// [5:4] - select 128 bits from sources for high half of destination
	// [6] - ignore
	// [7] - zero high half of destination

	uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();

	unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
	unsigned HalfSize = NumElts / 2;
	SmallVector<int, 8> ShuffleMask(NumElts);

	// Determine which operand(s) are actually in use for this instruction.
	Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
	Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);

	// If needed, replace operands based on zero mask.
	V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
	V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;

	// Permute low half of result.
	unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
	for (unsigned i = 0; i < HalfSize; ++i)
	ShuffleMask[i] = StartIndex + i;

	// Permute high half of result.
	StartIndex = (Imm & 0x10) ? HalfSize : 0;
	for (unsigned i = 0; i < HalfSize; ++i)
	ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;

	Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);

	} else if (IsX86 && (Name.startswith("avx.vpermil.") \|\|
	Name == "sse2.pshuf.d" \|\|
	Name.startswith("avx512.mask.vpermil.p") \|\|
	Name.startswith("avx512.mask.pshuf.d."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	auto *VecTy = cast<FixedVectorType>(CI->getType());
	unsigned NumElts = VecTy->getNumElements();
	// Calculate the size of each index in the immediate.
	unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
	unsigned IdxMask = ((1 << IdxSize) - 1);

	SmallVector<int, 8> Idxs(NumElts);
	// Lookup the bits for this element, wrapping around the immediate every
	// 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
	// to offset by the first index of each group.
	for (unsigned i = 0; i != NumElts; ++i)
	Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) \| (i & ~IdxMask);

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->arg_size() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name == "sse2.pshufl.w" \|\|
	Name.startswith("avx512.mask.pshufl.w."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();

	SmallVector<int, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += 8) {
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
	for (unsigned i = 4; i != 8; ++i)
	Idxs[i + l] = i + l;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->arg_size() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name == "sse2.pshufh.w" \|\|
	Name.startswith("avx512.mask.pshufh.w."))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
	unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();

	SmallVector<int, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += 8) {
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l] = i + l;
	for (unsigned i = 0; i != 4; ++i)
	Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	if (CI->arg_size() == 4)
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
	unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();

	unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
	unsigned HalfLaneElts = NumLaneElts / 2;

	SmallVector<int, 16> Idxs(NumElts);
	for (unsigned i = 0; i != NumElts; ++i) {
	// Base index is the starting element of the lane.
	Idxs[i] = i - (i % NumLaneElts);
	// If we are half way through the lane switch to the other source.
	if ((i % NumLaneElts) >= HalfLaneElts)
	Idxs[i] += NumElts;
	// Now select the specific element. By adding HalfLaneElts bits from
	// the immediate. Wrapping around the immediate every 8-bits.
	Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
	}

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
	CI->getArgOperand(3));
	} else if (IsX86 && (Name.startswith("avx512.mask.movddup") \|\|
	Name.startswith("avx512.mask.movshdup") \|\|
	Name.startswith("avx512.mask.movsldup"))) {
	Value *Op0 = CI->getArgOperand(0);
	unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
	unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	unsigned Offset = 0;
	if (Name.startswith("avx512.mask.movshdup."))
	Offset = 1;

	SmallVector<int, 16> Idxs(NumElts);
	for (unsigned l = 0; l != NumElts; l += NumLaneElts)
	for (unsigned i = 0; i != NumLaneElts; i += 2) {
	Idxs[i + l + 0] = i + l + Offset;
	Idxs[i + l + 1] = i + l + Offset;
	}

	Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && (Name.startswith("avx512.mask.punpckl") \|\|
	Name.startswith("avx512.mask.unpckl."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
	int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	SmallVector<int, 64> Idxs(NumElts);
	for (int l = 0; l != NumElts; l += NumLaneElts)
	for (int i = 0; i != NumLaneElts; ++i)
	Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.punpckh") \|\|
	Name.startswith("avx512.mask.unpckh."))) {
	Value *Op0 = CI->getArgOperand(0);
	Value *Op1 = CI->getArgOperand(1);
	int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
	int NumLaneElts = 128/CI->getType()->getScalarSizeInBits();

	SmallVector<int, 64> Idxs(NumElts);
	for (int l = 0; l != NumElts; l += NumLaneElts)
	for (int i = 0; i != NumLaneElts; ++i)
	Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);

	Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.and.") \|\|
	Name.startswith("avx512.mask.pand."))) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.andn.") \|\|
	Name.startswith("avx512.mask.pandn."))) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
	Rep = Builder.CreateAnd(Rep,
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.or.") \|\|
	Name.startswith("avx512.mask.por."))) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.xor.") \|\|
	Name.startswith("avx512.mask.pxor."))) {
	VectorType *FTy = cast<VectorType>(CI->getType());
	VectorType *ITy = VectorType::getInteger(FTy);
	Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
	Builder.CreateBitCast(CI->getArgOperand(1), ITy));
	Rep = Builder.CreateBitCast(Rep, FTy);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
	Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
	Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
	Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
	if (Name.endswith(".512")) {
	Intrinsic::ID IID;
	if (Name[17] == 's')
	IID = Intrinsic::x86_avx512_add_ps_512;
	else
	IID = Intrinsic::x86_avx512_add_pd_512;

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(4) });
	} else {
	Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
	}
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
	if (Name.endswith(".512")) {
	Intrinsic::ID IID;
	if (Name[17] == 's')
	IID = Intrinsic::x86_avx512_div_ps_512;
	else
	IID = Intrinsic::x86_avx512_div_pd_512;

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(4) });
	} else {
	Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
	}
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
	if (Name.endswith(".512")) {
	Intrinsic::ID IID;
	if (Name[17] == 's')
	IID = Intrinsic::x86_avx512_mul_ps_512;
	else
	IID = Intrinsic::x86_avx512_mul_pd_512;

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(4) });
	} else {
	Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
	}
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
	if (Name.endswith(".512")) {
	Intrinsic::ID IID;
	if (Name[17] == 's')
	IID = Intrinsic::x86_avx512_sub_ps_512;
	else
	IID = Intrinsic::x86_avx512_sub_pd_512;

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(4) });
	} else {
	Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
	}
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && (Name.startswith("avx512.mask.max.p") \|\|
	Name.startswith("avx512.mask.min.p")) &&
	Name.drop_front(18) == ".512") {
	bool IsDouble = Name[17] == 'd';
	bool IsMin = Name[13] == 'i';
	static const Intrinsic::ID MinMaxTbl[2][2] = {
	{ Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
	{ Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
	};
	Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(4) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
	CI->getArgOperand(2));
	} else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
	Intrinsic::ctlz,
	CI->getType()),
	{ CI->getArgOperand(0), Builder.getInt1(false) });
	Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
	CI->getArgOperand(1));
	} else if (IsX86 && Name.startswith("avx512.mask.psll")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
	IID = Intrinsic::x86_avx2_psllv_q;
	else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
	IID = Intrinsic::x86_avx2_psllv_q_256;
	else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
	IID = Intrinsic::x86_avx2_psllv_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
	IID = Intrinsic::x86_avx2_psllv_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
	IID = Intrinsic::x86_avx512_psllv_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
	IID = Intrinsic::x86_avx512_psllv_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
	IID = Intrinsic::x86_avx512_psllv_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
	: Intrinsic::x86_sse2_psll_d;
	else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
	: Intrinsic::x86_sse2_psll_q;
	else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
	: Intrinsic::x86_sse2_psll_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
	: Intrinsic::x86_avx2_psll_d;
	else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
	: Intrinsic::x86_avx2_psll_q;
	else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
	: Intrinsic::x86_avx2_psll_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psllv_d_512 :
	Intrinsic::x86_avx512_psll_d_512;
	else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psllv_q_512 :
	Intrinsic::x86_avx512_psll_q_512;
	else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
	IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
	: Intrinsic::x86_avx512_psll_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
	IID = Intrinsic::x86_avx2_psrlv_q;
	else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
	IID = Intrinsic::x86_avx2_psrlv_q_256;
	else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
	IID = Intrinsic::x86_avx2_psrlv_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
	IID = Intrinsic::x86_avx2_psrlv_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
	IID = Intrinsic::x86_avx512_psrlv_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
	IID = Intrinsic::x86_avx512_psrlv_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
	IID = Intrinsic::x86_avx512_psrlv_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
	: Intrinsic::x86_sse2_psrl_d;
	else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
	: Intrinsic::x86_sse2_psrl_q;
	else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
	: Intrinsic::x86_sse2_psrl_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
	: Intrinsic::x86_avx2_psrl_d;
	else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
	: Intrinsic::x86_avx2_psrl_q;
	else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
	: Intrinsic::x86_avx2_psrl_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 :
	Intrinsic::x86_avx512_psrl_d_512;
	else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 :
	Intrinsic::x86_avx512_psrl_q_512;
	else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
	IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
	: Intrinsic::x86_avx512_psrl_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.psra")) {
	bool IsImmediate = Name[16] == 'i' \|\|
	(Name.size() > 18 && Name[18] == 'i');
	bool IsVariable = Name[16] == 'v';
	char Size = Name[16] == '.' ? Name[17] :
	Name[17] == '.' ? Name[18] :
	Name[18] == '.' ? Name[19] :
	Name[20];

	Intrinsic::ID IID;
	if (IsVariable && Name[17] != '.') {
	if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
	IID = Intrinsic::x86_avx2_psrav_d;
	else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
	IID = Intrinsic::x86_avx2_psrav_d_256;
	else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
	IID = Intrinsic::x86_avx512_psrav_w_128;
	else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
	IID = Intrinsic::x86_avx512_psrav_w_256;
	else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
	IID = Intrinsic::x86_avx512_psrav_w_512;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".128")) {
	if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
	: Intrinsic::x86_sse2_psra_d;
	else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_128 :
	Intrinsic::x86_avx512_psra_q_128;
	else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
	IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
	: Intrinsic::x86_sse2_psra_w;
	else
	llvm_unreachable("Unexpected size");
	} else if (Name.endswith(".256")) {
	if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
	: Intrinsic::x86_avx2_psra_d;
	else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_256 :
	Intrinsic::x86_avx512_psra_q_256;
	else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
	IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
	: Intrinsic::x86_avx2_psra_w;
	else
	llvm_unreachable("Unexpected size");
	} else {
	if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
	IsVariable ? Intrinsic::x86_avx512_psrav_d_512 :
	Intrinsic::x86_avx512_psra_d_512;
	else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
	IsVariable ? Intrinsic::x86_avx512_psrav_q_512 :
	Intrinsic::x86_avx512_psra_q_512;
	else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
	IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
	: Intrinsic::x86_avx512_psra_w_512;
	else
	llvm_unreachable("Unexpected size");
	}

	Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
	} else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
	Rep = upgradeMaskedMove(Builder, *CI);
	} else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
	Rep = UpgradeMaskToInt(Builder, *CI);
	} else if (IsX86 && Name.endswith(".movntdqa")) {
	Module *M = F->getParent();
	MDNode *Node = MDNode::get(
	C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));

	Value *Ptr = CI->getArgOperand(0);

	// Convert the type of the pointer to a pointer to the stored type.
	Value *BC = Builder.CreateBitCast(
	Ptr, PointerType::getUnqual(CI->getType()), "cast");
	LoadInst *LI = Builder.CreateAlignedLoad(
	CI->getType(), BC,
	Align(CI->getType()->getPrimitiveSizeInBits().getFixedSize() / 8));
	LI->setMetadata(M->getMDKindID("nontemporal"), Node);
	Rep = LI;
	} else if (IsX86 && (Name.startswith("fma.vfmadd.") \|\|
	Name.startswith("fma.vfmsub.") \|\|
	Name.startswith("fma.vfnmadd.") \|\|
	Name.startswith("fma.vfnmsub."))) {
	bool NegMul = Name[6] == 'n';
	bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
	bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';

	Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };

	if (IsScalar) {
	Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
	Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
	Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
	}

	if (NegMul && !IsScalar)
	Ops[0] = Builder.CreateFNeg(Ops[0]);
	if (NegMul && IsScalar)
	Ops[1] = Builder.CreateFNeg(Ops[1]);
	if (NegAcc)
	Ops[2] = Builder.CreateFNeg(Ops[2]);

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
	Intrinsic::fma,
	Ops[0]->getType()),
	Ops);

	if (IsScalar)
	Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
	(uint64_t)0);
	} else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
	Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };

	Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
	Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
	Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
	Intrinsic::fma,
	Ops[0]->getType()),
	Ops);

	Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
	Rep, (uint64_t)0);
	} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") \|\|
	Name.startswith("avx512.maskz.vfmadd.s") \|\|
	Name.startswith("avx512.mask3.vfmadd.s") \|\|
	Name.startswith("avx512.mask3.vfmsub.s") \|\|
	Name.startswith("avx512.mask3.vfnmsub.s"))) {
	bool IsMask3 = Name[11] == '3';
	bool IsMaskZ = Name[11] == 'z';
	// Drop the "avx512.mask." to make it easier.
	Name = Name.drop_front(IsMask3 \|\| IsMaskZ ? 13 : 12);
	bool NegMul = Name[2] == 'n';
	bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';

	Value *A = CI->getArgOperand(0);
	Value *B = CI->getArgOperand(1);
	Value *C = CI->getArgOperand(2);

	if (NegMul && (IsMask3 \|\| IsMaskZ))
	A = Builder.CreateFNeg(A);
	if (NegMul && !(IsMask3 \|\| IsMaskZ))
	B = Builder.CreateFNeg(B);
	if (NegAcc)
	C = Builder.CreateFNeg(C);

	A = Builder.CreateExtractElement(A, (uint64_t)0);
	B = Builder.CreateExtractElement(B, (uint64_t)0);
	C = Builder.CreateExtractElement(C, (uint64_t)0);

	if (!isa<ConstantInt>(CI->getArgOperand(4)) \|\|
	cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
	Value *Ops[] = { A, B, C, CI->getArgOperand(4) };

	Intrinsic::ID IID;
	if (Name.back() == 'd')
	IID = Intrinsic::x86_avx512_vfmadd_f64;
	else
	IID = Intrinsic::x86_avx512_vfmadd_f32;
	Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
	Rep = Builder.CreateCall(FMA, Ops);
	} else {
	Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
	Intrinsic::fma,
	A->getType());
	Rep = Builder.CreateCall(FMA, { A, B, C });
	}

	Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
	IsMask3 ? C : A;

	// For Mask3 with NegAcc, we need to create a new extractelement that
	// avoids the negation above.
	if (NegAcc && IsMask3)
	PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
	(uint64_t)0);

	Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
	Rep, PassThru);
	Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
	Rep, (uint64_t)0);
	} else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") \|\|
	Name.startswith("avx512.mask.vfnmadd.p") \|\|
	Name.startswith("avx512.mask.vfnmsub.p") \|\|
	Name.startswith("avx512.mask3.vfmadd.p") \|\|
	Name.startswith("avx512.mask3.vfmsub.p") \|\|
	Name.startswith("avx512.mask3.vfnmsub.p") \|\|
	Name.startswith("avx512.maskz.vfmadd.p"))) {
	bool IsMask3 = Name[11] == '3';
	bool IsMaskZ = Name[11] == 'z';
	// Drop the "avx512.mask." to make it easier.
	Name = Name.drop_front(IsMask3 \|\| IsMaskZ ? 13 : 12);
	bool NegMul = Name[2] == 'n';
	bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';

	Value *A = CI->getArgOperand(0);
	Value *B = CI->getArgOperand(1);
	Value *C = CI->getArgOperand(2);

	if (NegMul && (IsMask3 \|\| IsMaskZ))
	A = Builder.CreateFNeg(A);
	if (NegMul && !(IsMask3 \|\| IsMaskZ))
	B = Builder.CreateFNeg(B);
	if (NegAcc)
	C = Builder.CreateFNeg(C);

	if (CI->arg_size() == 5 &&
	(!isa<ConstantInt>(CI->getArgOperand(4)) \|\|
	cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
	Intrinsic::ID IID;
	// Check the character before ".512" in string.
	if (Name[Name.size()-5] == 's')
	IID = Intrinsic::x86_avx512_vfmadd_ps_512;
	else
	IID = Intrinsic::x86_avx512_vfmadd_pd_512;

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	{ A, B, C, CI->getArgOperand(4) });
	} else {
	Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
	Intrinsic::fma,
	A->getType());
	Rep = Builder.CreateCall(FMA, { A, B, C });
	}

	Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
	IsMask3 ? CI->getArgOperand(2) :
	CI->getArgOperand(0);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
	} else if (IsX86 && Name.startswith("fma.vfmsubadd.p")) {
	unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
	unsigned EltWidth = CI->getType()->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_fma_vfmaddsub_ps;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_fma_vfmaddsub_pd;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };
	Ops[2] = Builder.CreateFNeg(Ops[2]);
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	Ops);
	} else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") \|\|
	Name.startswith("avx512.mask3.vfmaddsub.p") \|\|
	Name.startswith("avx512.maskz.vfmaddsub.p") \|\|
	Name.startswith("avx512.mask3.vfmsubadd.p"))) {
	bool IsMask3 = Name[11] == '3';
	bool IsMaskZ = Name[11] == 'z';
	// Drop the "avx512.mask." to make it easier.
	Name = Name.drop_front(IsMask3 \|\| IsMaskZ ? 13 : 12);
	bool IsSubAdd = Name[3] == 's';
	if (CI->arg_size() == 5) {
	Intrinsic::ID IID;
	// Check the character before ".512" in string.
	if (Name[Name.size()-5] == 's')
	IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
	else
	IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;

	Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2), CI->getArgOperand(4) };
	if (IsSubAdd)
	Ops[2] = Builder.CreateFNeg(Ops[2]);

	Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
	Ops);
	} else {
	int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();

	Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };

	Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
	Ops[0]->getType());
	Value *Odd = Builder.CreateCall(FMA, Ops);
	Ops[2] = Builder.CreateFNeg(Ops[2]);
	Value *Even = Builder.CreateCall(FMA, Ops);

	if (IsSubAdd)
	std::swap(Even, Odd);

	SmallVector<int, 32> Idxs(NumElts);
	for (int i = 0; i != NumElts; ++i)
	Idxs[i] = i + (i % 2) * NumElts;

	Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
	}

	Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
	IsMask3 ? CI->getArgOperand(2) :
	CI->getArgOperand(0);

	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
	} else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") \|\|
	Name.startswith("avx512.maskz.pternlog."))) {
	bool ZeroMask = Name[11] == 'z';
	unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
	unsigned EltWidth = CI->getType()->getScalarSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_pternlog_d_128;
	else if (VecWidth == 256 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_pternlog_d_256;
	else if (VecWidth == 512 && EltWidth == 32)
	IID = Intrinsic::x86_avx512_pternlog_d_512;
	else if (VecWidth == 128 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_pternlog_q_128;
	else if (VecWidth == 256 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_pternlog_q_256;
	else if (VecWidth == 512 && EltWidth == 64)
	IID = Intrinsic::x86_avx512_pternlog_q_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
	CI->getArgOperand(2), CI->getArgOperand(3) };
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
	Args);
	Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
	: CI->getArgOperand(0);
	Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
	} else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") \|\|
	Name.startswith("avx512.maskz.vpmadd52"))) {
	bool ZeroMask = Name[11] == 'z';
	bool High = Name[20] == 'h' \|\| Name[21] == 'h';
	unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && !High)
	IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
	else if (VecWidth == 256 && !High)
	IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
	else if (VecWidth == 512 && !High)
	IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
	else if (VecWidth == 128 && High)
	IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
	else if (VecWidth == 256 && High)
	IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
	else if (VecWidth == 512 && High)
	IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
	CI->getArgOperand(2) };
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
	Args);
	Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
	: CI->getArgOperand(0);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
	} else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") \|\|
	Name.startswith("avx512.mask.vpermt2var.") \|\|
	Name.startswith("avx512.maskz.vpermt2var."))) {
	bool ZeroMask = Name[11] == 'z';
	bool IndexForm = Name[17] == 'i';
	Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
	} else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") \|\|
	Name.startswith("avx512.maskz.vpdpbusd.") \|\|
	Name.startswith("avx512.mask.vpdpbusds.") \|\|
	Name.startswith("avx512.maskz.vpdpbusds."))) {
	bool ZeroMask = Name[11] == 'z';
	bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
	unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusd_128;
	else if (VecWidth == 256 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusd_256;
	else if (VecWidth == 512 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusd_512;
	else if (VecWidth == 128 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusds_128;
	else if (VecWidth == 256 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusds_256;
	else if (VecWidth == 512 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpbusds_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
	Args);
	Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
	: CI->getArgOperand(0);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
	} else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") \|\|
	Name.startswith("avx512.maskz.vpdpwssd.") \|\|
	Name.startswith("avx512.mask.vpdpwssds.") \|\|
	Name.startswith("avx512.maskz.vpdpwssds."))) {
	bool ZeroMask = Name[11] == 'z';
	bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
	unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
	Intrinsic::ID IID;
	if (VecWidth == 128 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssd_128;
	else if (VecWidth == 256 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssd_256;
	else if (VecWidth == 512 && !IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssd_512;
	else if (VecWidth == 128 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssds_128;
	else if (VecWidth == 256 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssds_256;
	else if (VecWidth == 512 && IsSaturating)
	IID = Intrinsic::x86_avx512_vpdpwssds_512;
	else
	llvm_unreachable("Unexpected intrinsic");

	Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2) };
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
	Args);
	Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
	: CI->getArgOperand(0);
	Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
	} else if (IsX86 && (Name == "addcarryx.u32" \|\| Name == "addcarryx.u64" \|\|
	Name == "addcarry.u32" \|\| Name == "addcarry.u64" \|\|
	Name == "subborrow.u32" \|\| Name == "subborrow.u64")) {
	Intrinsic::ID IID;
	if (Name[0] == 'a' && Name.back() == '2')
	IID = Intrinsic::x86_addcarry_32;
	else if (Name[0] == 'a' && Name.back() == '4')
	IID = Intrinsic::x86_addcarry_64;
	else if (Name[0] == 's' && Name.back() == '2')
	IID = Intrinsic::x86_subborrow_32;
	else if (Name[0] == 's' && Name.back() == '4')
	IID = Intrinsic::x86_subborrow_64;
	else
	llvm_unreachable("Unexpected intrinsic");

	// Make a call with 3 operands.
	Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2)};
	Value *NewCall = Builder.CreateCall(
	Intrinsic::getDeclaration(CI->getModule(), IID),
	Args);

	// Extract the second result and store it.
	Value *Data = Builder.CreateExtractValue(NewCall, 1);
	// Cast the pointer to the right type.
	Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
	llvm::PointerType::getUnqual(Data->getType()));
	Builder.CreateAlignedStore(Data, Ptr, Align(1));
	// Replace the original call result with the first result of the new call.
	Value *CF = Builder.CreateExtractValue(NewCall, 0);

	CI->replaceAllUsesWith(CF);
	Rep = nullptr;
	} else if (IsX86 && Name.startswith("avx512.mask.") &&
	upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
	// Rep will be updated by the call in the condition.
	} else if (IsNVVM && (Name == "abs.i" \|\| Name == "abs.ll")) {
	Value *Arg = CI->getArgOperand(0);
	Value *Neg = Builder.CreateNeg(Arg, "neg");
	Value *Cmp = Builder.CreateICmpSGE(
	Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
	Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
	} else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") \|\|
	Name.startswith("atomic.load.add.f64.p"))) {
	Value *Ptr = CI->getArgOperand(0);
	Value *Val = CI->getArgOperand(1);
	Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
	AtomicOrdering::SequentiallyConsistent);
	} else if (IsNVVM && (Name == "max.i" \|\| Name == "max.ll" \|\|
	Name == "max.ui" \|\| Name == "max.ull")) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);
	Value *Cmp = Name.endswith(".ui") \|\| Name.endswith(".ull")
	? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond")
	: Builder.CreateICmpSGE(Arg0, Arg1, "max.cond");
	Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max");
	} else if (IsNVVM && (Name == "min.i" \|\| Name == "min.ll" \|\|
	Name == "min.ui" \|\| Name == "min.ull")) {
	Value *Arg0 = CI->getArgOperand(0);
	Value *Arg1 = CI->getArgOperand(1);
	Value *Cmp = Name.endswith(".ui") \|\| Name.endswith(".ull")
	? Builder.CreateICmpULE(Arg0, Arg1, "min.cond")
	: Builder.CreateICmpSLE(Arg0, Arg1, "min.cond");
	Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min");
	} else if (IsNVVM && Name == "clz.ll") {
	// llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 and returns an i64.
	Value *Arg = CI->getArgOperand(0);
	Value *Ctlz = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
	{Arg->getType()}),
	{Arg, Builder.getFalse()}, "ctlz");
	Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
	} else if (IsNVVM && Name == "popc.ll") {
	// llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 and returns an
	// i64.
	Value *Arg = CI->getArgOperand(0);
	Value *Popc = Builder.CreateCall(
	Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
	{Arg->getType()}),
	Arg, "ctpop");
	Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
	} else if (IsNVVM && Name == "h2f") {
	Rep = Builder.CreateCall(Intrinsic::getDeclaration(
	F->getParent(), Intrinsic::convert_from_fp16,
	{Builder.getFloatTy()}),
	CI->getArgOperand(0), "h2f");
	} else if (IsARM) {
	Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
	} else {
	llvm_unreachable("Unknown function for CallBase upgrade.");
	}

	if (Rep)
	CI->replaceAllUsesWith(Rep);
	CI->eraseFromParent();
	return;
	}

	const auto &DefaultCase = [&]() -> void {
	if (CI->getFunctionType() == NewFn->getFunctionType()) {
	// Handle generic mangling change.
	assert(
	(CI->getCalledFunction()->getName() != NewFn->getName()) &&
	"Unknown function for CallBase upgrade and isn't just a name change");
	CI->setCalledFunction(NewFn);
	return;
	}

	// This must be an upgrade from a named to a literal struct.
	auto *OldST = cast<StructType>(CI->getType());
	assert(OldST != NewFn->getReturnType() && "Return type must have changed");
	assert(OldST->getNumElements() ==
	cast<StructType>(NewFn->getReturnType())->getNumElements() &&
	"Must have same number of elements");

	SmallVector<Value *> Args(CI->args());
	Value *NewCI = Builder.CreateCall(NewFn, Args);
	Value *Res = PoisonValue::get(OldST);
	for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
	Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
	Res = Builder.CreateInsertValue(Res, Elem, Idx);
	}
	CI->replaceAllUsesWith(Res);
	CI->eraseFromParent();
	return;
	};
	CallInst *NewCall = nullptr;
	switch (NewFn->getIntrinsicID()) {
	default: {
	DefaultCase();
	return;
	}
	case Intrinsic::arm_neon_vst1:
	case Intrinsic::arm_neon_vst2:
	case Intrinsic::arm_neon_vst3:
	case Intrinsic::arm_neon_vst4:
	case Intrinsic::arm_neon_vst2lane:
	case Intrinsic::arm_neon_vst3lane:
	case Intrinsic::arm_neon_vst4lane: {
	SmallVector<Value *, 4> Args(CI->args());
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::arm_neon_bfdot:
	case Intrinsic::arm_neon_bfmmla:
	case Intrinsic::arm_neon_bfmlalb:
	case Intrinsic::arm_neon_bfmlalt:
	case Intrinsic::aarch64_neon_bfdot:
	case Intrinsic::aarch64_neon_bfmmla:
	case Intrinsic::aarch64_neon_bfmlalb:
	case Intrinsic::aarch64_neon_bfmlalt: {
	SmallVector<Value *, 3> Args;
	assert(CI->arg_size() == 3 &&
	"Mismatch between function args and call args");
	size_t OperandWidth =
	CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
	assert((OperandWidth == 64 \|\| OperandWidth == 128) &&
	"Unexpected operand width");
	Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
	auto Iter = CI->args().begin();
	Args.push_back(*Iter++);
	Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
	Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::bitreverse:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::ctlz:
	case Intrinsic::cttz:
	assert(CI->arg_size() == 1 &&
	"Mismatch between function args and call args");
	NewCall =
	Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
	break;

	case Intrinsic::objectsize: {
	Value *NullIsUnknownSize =
	CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
	Value *Dynamic =
	CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
	NewCall = Builder.CreateCall(
	NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
	break;
	}

	case Intrinsic::ctpop:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::convert_from_fp16:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
	break;

	case Intrinsic::dbg_value:
	// Upgrade from the old version that had an extra offset argument.
	assert(CI->arg_size() == 4);
	// Drop nonzero offsets instead of attempting to upgrade them.
	if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
	if (Offset->isZeroValue()) {
	NewCall = Builder.CreateCall(
	NewFn,
	{CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
	break;
	}
	CI->eraseFromParent();
	return;

	case Intrinsic::ptr_annotation:
	// Upgrade from versions that lacked the annotation attribute argument.
	if (CI->arg_size() != 4) {
	DefaultCase();
	return;
	}

	// Create a new call with an added null annotation attribute argument.
	NewCall = Builder.CreateCall(
	NewFn,
	{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
	CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
	NewCall->takeName(CI);
	CI->replaceAllUsesWith(NewCall);
	CI->eraseFromParent();
	return;

	case Intrinsic::var_annotation:
	// Upgrade from versions that lacked the annotation attribute argument.
	assert(CI->arg_size() == 4 &&
	"Before LLVM 12.0 this intrinsic took four arguments");
	// Create a new call with an added null annotation attribute argument.
	NewCall = Builder.CreateCall(
	NewFn,
	{CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
	CI->getArgOperand(3), Constant::getNullValue(Builder.getInt8PtrTy())});
	CI->eraseFromParent();
	return;

	case Intrinsic::x86_xop_vfrcz_ss:
	case Intrinsic::x86_xop_vfrcz_sd:
	NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
	break;

	case Intrinsic::x86_xop_vpermil2pd:
	case Intrinsic::x86_xop_vpermil2ps:
	case Intrinsic::x86_xop_vpermil2pd_256:
	case Intrinsic::x86_xop_vpermil2ps_256: {
	SmallVector<Value *, 4> Args(CI->args());
	VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
	VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
	Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::x86_sse41_ptestc:
	case Intrinsic::x86_sse41_ptestz:
	case Intrinsic::x86_sse41_ptestnzc: {
	// The arguments for these intrinsics used to be v4f32, and changed
	// to v2i64. This is purely a nop, since those are bitwise intrinsics.
	// So, the only thing required is a bitcast for both arguments.
	// First, check the arguments have the old type.
	Value *Arg0 = CI->getArgOperand(0);
	if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
	return;

	// Old intrinsic, add bitcasts
	Value *Arg1 = CI->getArgOperand(1);

	auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);

	Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
	Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");

	NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
	break;
	}

	case Intrinsic::x86_rdtscp: {
	// This used to take 1 arguments. If we have no arguments, it is already
	// upgraded.
	if (CI->getNumOperands() == 0)
	return;

	NewCall = Builder.CreateCall(NewFn);
	// Extract the second result and store it.
	Value *Data = Builder.CreateExtractValue(NewCall, 1);
	// Cast the pointer to the right type.
	Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
	llvm::PointerType::getUnqual(Data->getType()));
	Builder.CreateAlignedStore(Data, Ptr, Align(1));
	// Replace the original call result with the first result of the new call.
	Value *TSC = Builder.CreateExtractValue(NewCall, 0);

	NewCall->takeName(CI);
	CI->replaceAllUsesWith(TSC);
	CI->eraseFromParent();
	return;
	}

	case Intrinsic::x86_sse41_insertps:
	case Intrinsic::x86_sse41_dppd:
	case Intrinsic::x86_sse41_dpps:
	case Intrinsic::x86_sse41_mpsadbw:
	case Intrinsic::x86_avx_dp_ps_256:
	case Intrinsic::x86_avx2_mpsadbw: {
	// Need to truncate the last argument from i32 to i8 -- this argument models
	// an inherently 8-bit immediate operand to these x86 instructions.
	SmallVector<Value *, 4> Args(CI->args());

	// Replace the last argument with a trunc.
	Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}

	case Intrinsic::x86_avx512_mask_cmp_pd_128:
	case Intrinsic::x86_avx512_mask_cmp_pd_256:
	case Intrinsic::x86_avx512_mask_cmp_pd_512:
	case Intrinsic::x86_avx512_mask_cmp_ps_128:
	case Intrinsic::x86_avx512_mask_cmp_ps_256:
	case Intrinsic::x86_avx512_mask_cmp_ps_512: {
	SmallVector<Value *, 4> Args(CI->args());
	unsigned NumElts =
	cast<FixedVectorType>(Args[0]->getType())->getNumElements();
	Args[3] = getX86MaskVec(Builder, Args[3], NumElts);

	NewCall = Builder.CreateCall(NewFn, Args);
	Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, nullptr);

	NewCall->takeName(CI);
	CI->replaceAllUsesWith(Res);
	CI->eraseFromParent();
	return;
	}

	case Intrinsic::thread_pointer: {
	NewCall = Builder.CreateCall(NewFn, {});
	break;
	}

	case Intrinsic::invariant_start:
	case Intrinsic::invariant_end: {
	SmallVector<Value *, 4> Args(CI->args());
	NewCall = Builder.CreateCall(NewFn, Args);
	break;
	}
	case Intrinsic::masked_load:
	case Intrinsic::masked_store:
	case Intrinsic::masked_gather:
	case Intrinsic::masked_scatter: {
	SmallVector<Value *, 4> Args(CI->args());
	NewCall = Builder.CreateCall(NewFn, Args);
	NewCall->copyMetadata(*CI);
	break;
	}

	case Intrinsic::memcpy:
	case Intrinsic::memmove:
	case Intrinsic::memset: {
	// We have to make sure that the call signature is what we're expecting.
	// We only want to change the old signatures by removing the alignment arg:
	// @llvm.mem[cpy\|move]...(i8, i8, i[32\|i64], i32, i1)
	// -> @llvm.mem[cpy\|move]...(i8, i8, i[32\|i64], i1)
	// @llvm.memset...(i8*, i8, i[32\|64], i32, i1)
	// -> @llvm.memset...(i8*, i8, i[32\|64], i1)
	// Note: i8*'s in the above can be any pointer type
	if (CI->arg_size() != 5) {
	DefaultCase();
	return;
	}
	// Remove alignment argument (3), and add alignment attributes to the
	// dest/src pointers.
	Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
	CI->getArgOperand(2), CI->getArgOperand(4)};
	NewCall = Builder.CreateCall(NewFn, Args);
	AttributeList OldAttrs = CI->getAttributes();
	AttributeList NewAttrs = AttributeList::get(
	C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
	{OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
	OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
	NewCall->setAttributes(NewAttrs);
	auto *MemCI = cast<MemIntrinsic>(NewCall);
	// All mem intrinsics support dest alignment.
	const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3));
	MemCI->setDestAlignment(Align->getMaybeAlignValue());
	// Memcpy/Memmove also support source alignment.
	if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
	MTI->setSourceAlignment(Align->getMaybeAlignValue());
	break;
	}
	}
	assert(NewCall && "Should have either set this variable or returned through "
	"the default case");
	NewCall->takeName(CI);
	CI->replaceAllUsesWith(NewCall);
	CI->eraseFromParent();
	}

	void llvm::UpgradeCallsToIntrinsic(Function *F) {
	assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");

	// Check if this function should be upgraded and get the replacement function
	// if there is one.
	Function *NewFn;
	if (UpgradeIntrinsicFunction(F, NewFn)) {
	// Replace all users of the old function with the new function or new
	// instructions. This is not a range loop because the call is deleted.
	for (User *U : make_early_inc_range(F->users()))
	if (CallBase *CB = dyn_cast<CallBase>(U))
	UpgradeIntrinsicCall(CB, NewFn);

	// Remove old function, no longer used, from the module.
	F->eraseFromParent();
	}
	}

	MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
	// Check if the tag uses struct-path aware TBAA format.
	if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
	return &MD;

	auto &Context = MD.getContext();
	if (MD.getNumOperands() == 3) {
	Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
	MDNode *ScalarType = MDNode::get(Context, Elts);
	// Create a MDNode <ScalarType, ScalarType, offset 0, const>
	Metadata *Elts2[] = {ScalarType, ScalarType,
	ConstantAsMetadata::get(
	Constant::getNullValue(Type::getInt64Ty(Context))),
	MD.getOperand(2)};
	return MDNode::get(Context, Elts2);
	}
	// Create a MDNode <MD, MD, offset 0>
	Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
	Type::getInt64Ty(Context)))};
	return MDNode::get(Context, Elts);
	}

	Instruction llvm::UpgradeBitCastInst(unsigned Opc, Value V, Type *DestTy,
	Instruction *&Temp) {
	if (Opc != Instruction::BitCast)
	return nullptr;

	Temp = nullptr;
	Type *SrcTy = V->getType();
	if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
	SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
	LLVMContext &Context = V->getContext();

	// We have no information about target data layout, so we assume that
	// the maximum pointer size is 64bit.
	Type *MidTy = Type::getInt64Ty(Context);
	Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);

	return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
	}

	return nullptr;
	}

	Constant llvm::UpgradeBitCastExpr(unsigned Opc, Constant C, Type *DestTy) {
	if (Opc != Instruction::BitCast)
	return nullptr;

	Type *SrcTy = C->getType();
	if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
	SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
	LLVMContext &Context = C->getContext();

	// We have no information about target data layout, so we assume that
	// the maximum pointer size is 64bit.
	Type *MidTy = Type::getInt64Ty(Context);

	return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy),
	DestTy);
	}

	return nullptr;
	}

	/// Check the debug info version number, if it is out-dated, drop the debug
	/// info. Return true if module is modified.
	bool llvm::UpgradeDebugInfo(Module &M) {
	unsigned Version = getDebugMetadataVersionFromModule(M);
	if (Version == DEBUG_METADATA_VERSION) {
	bool BrokenDebugInfo = false;
	if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
	report_fatal_error("Broken module found, compilation aborted!");
	if (!BrokenDebugInfo)
	// Everything is ok.
	return false;
	else {
	// Diagnose malformed debug info.
	DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M);
	M.getContext().diagnose(Diag);
	}
	}
	bool Modified = StripDebugInfo(M);
	if (Modified && Version != DEBUG_METADATA_VERSION) {
	// Diagnose a version mismatch.
	DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
	M.getContext().diagnose(DiagVersion);
	}
	return Modified;
	}

	/// This checks for objc retain release marker which should be upgraded. It
	/// returns true if module is modified.
	static bool UpgradeRetainReleaseMarker(Module &M) {
	bool Changed = false;
	const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
	NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
	if (ModRetainReleaseMarker) {
	MDNode *Op = ModRetainReleaseMarker->getOperand(0);
	if (Op) {
	MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
	if (ID) {
	SmallVector<StringRef, 4> ValueComp;
	ID->getString().split(ValueComp, "#");
	if (ValueComp.size() == 2) {
	std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
	ID = MDString::get(M.getContext(), NewValue);
	}
	M.addModuleFlag(Module::Error, MarkerKey, ID);
	M.eraseNamedMetadata(ModRetainReleaseMarker);
	Changed = true;
	}
	}
	}
	return Changed;
	}

	void llvm::UpgradeARCRuntime(Module &M) {
	// This lambda converts normal function calls to ARC runtime functions to
	// intrinsic calls.
	auto UpgradeToIntrinsic = [&](const char *OldFunc,
	llvm::Intrinsic::ID IntrinsicFunc) {
	Function *Fn = M.getFunction(OldFunc);

	if (!Fn)
	return;

	Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);

	for (User *U : make_early_inc_range(Fn->users())) {
	CallInst *CI = dyn_cast<CallInst>(U);
	if (!CI \|\| CI->getCalledFunction() != Fn)
	continue;

	IRBuilder<> Builder(CI->getParent(), CI->getIterator());
	FunctionType *NewFuncTy = NewFn->getFunctionType();
	SmallVector<Value *, 2> Args;

	// Don't upgrade the intrinsic if it's not valid to bitcast the return
	// value to the return type of the old function.
	if (NewFuncTy->getReturnType() != CI->getType() &&
	!CastInst::castIsValid(Instruction::BitCast, CI,
	NewFuncTy->getReturnType()))
	continue;

	bool InvalidCast = false;

	for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
	Value *Arg = CI->getArgOperand(I);

	// Bitcast argument to the parameter type of the new function if it's
	// not a variadic argument.
	if (I < NewFuncTy->getNumParams()) {
	// Don't upgrade the intrinsic if it's not valid to bitcast the argument
	// to the parameter type of the new function.
	if (!CastInst::castIsValid(Instruction::BitCast, Arg,
	NewFuncTy->getParamType(I))) {
	InvalidCast = true;
	break;
	}
	Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
	}
	Args.push_back(Arg);
	}

	if (InvalidCast)
	continue;

	// Create a call instruction that calls the new function.
	CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
	NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
	NewCall->takeName(CI);

	// Bitcast the return value back to the type of the old call.
	Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());

	if (!CI->use_empty())
	CI->replaceAllUsesWith(NewRetVal);
	CI->eraseFromParent();
	}

	if (Fn->use_empty())
	Fn->eraseFromParent();
	};

	// Unconditionally convert a call to "clang.arc.use" to a call to
	// "llvm.objc.clang.arc.use".
	UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);

	// Upgrade the retain release marker. If there is no need to upgrade
	// the marker, that means either the module is already new enough to contain
	// new intrinsics or it is not ARC. There is no need to upgrade runtime call.
	if (!UpgradeRetainReleaseMarker(M))
	return;

	std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
	{"objc_autorelease", llvm::Intrinsic::objc_autorelease},
	{"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
	{"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
	{"objc_autoreleaseReturnValue",
	llvm::Intrinsic::objc_autoreleaseReturnValue},
	{"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
	{"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
	{"objc_initWeak", llvm::Intrinsic::objc_initWeak},
	{"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
	{"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
	{"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
	{"objc_release", llvm::Intrinsic::objc_release},
	{"objc_retain", llvm::Intrinsic::objc_retain},
	{"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
	{"objc_retainAutoreleaseReturnValue",
	llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
	{"objc_retainAutoreleasedReturnValue",
	llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
	{"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
	{"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
	{"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
	{"objc_unsafeClaimAutoreleasedReturnValue",
	llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
	{"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
	{"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
	{"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
	{"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
	{"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
	{"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
	{"objc_arc_annotation_topdown_bbstart",
	llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
	{"objc_arc_annotation_topdown_bbend",
	llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
	{"objc_arc_annotation_bottomup_bbstart",
	llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
	{"objc_arc_annotation_bottomup_bbend",
	llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};

	for (auto &I : RuntimeFuncs)
	UpgradeToIntrinsic(I.first, I.second);
	}

	bool llvm::UpgradeModuleFlags(Module &M) {
	NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
	if (!ModFlags)
	return false;

	bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
	bool HasSwiftVersionFlag = false;
	uint8_t SwiftMajorVersion, SwiftMinorVersion;
	uint32_t SwiftABIVersion;
	auto Int8Ty = Type::getInt8Ty(M.getContext());
	auto Int32Ty = Type::getInt32Ty(M.getContext());

	for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
	MDNode *Op = ModFlags->getOperand(I);
	if (Op->getNumOperands() != 3)
	continue;
	MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
	if (!ID)
	continue;
	if (ID->getString() == "Objective-C Image Info Version")
	HasObjCFlag = true;
	if (ID->getString() == "Objective-C Class Properties")
	HasClassProperties = true;
	// Upgrade PIC/PIE Module Flags. The module flag behavior for these two
	// field was Error and now they are Max.
	if (ID->getString() == "PIC Level" \|\| ID->getString() == "PIE Level") {
	if (auto *Behavior =
	mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
	if (Behavior->getLimitedValue() == Module::Error) {
	Type *Int32Ty = Type::getInt32Ty(M.getContext());
	Metadata *Ops[3] = {
	ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Max)),
	MDString::get(M.getContext(), ID->getString()),
	Op->getOperand(2)};
	ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
	Changed = true;
	}
	}
	}

	// Upgrade branch protection and return address signing module flags. The
	// module flag behavior for these fields were Error and now they are Min.
	if (ID->getString() == "branch-target-enforcement" \|\|
	ID->getString().startswith("sign-return-address")) {
	if (auto *Behavior =
	mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) {
	if (Behavior->getLimitedValue() == Module::Error) {
	Type *Int32Ty = Type::getInt32Ty(M.getContext());
	Metadata *Ops[3] = {
	ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
	Op->getOperand(1), Op->getOperand(2)};
	ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
	Changed = true;
	}
	}
	}

	// Upgrade Objective-C Image Info Section. Removed the whitespce in the
	// section name so that llvm-lto will not complain about mismatching
	// module flags that is functionally the same.
	if (ID->getString() == "Objective-C Image Info Section") {
	if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
	SmallVector<StringRef, 4> ValueComp;
	Value->getString().split(ValueComp, " ");
	if (ValueComp.size() != 1) {
	std::string NewValue;
	for (auto &S : ValueComp)
	NewValue += S.str();
	Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
	MDString::get(M.getContext(), NewValue)};
	ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
	Changed = true;
	}
	}
	}

	// IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
	// If the higher bits are set, it adds new module flag for swift info.
	if (ID->getString() == "Objective-C Garbage Collection") {
	auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
	if (Md) {
	assert(Md->getValue() && "Expected non-empty metadata");
	auto Type = Md->getValue()->getType();
	if (Type == Int8Ty)
	continue;
	unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
	if ((Val & 0xff) != Val) {
	HasSwiftVersionFlag = true;
	SwiftABIVersion = (Val & 0xff00) >> 8;
	SwiftMajorVersion = (Val & 0xff000000) >> 24;
	SwiftMinorVersion = (Val & 0xff0000) >> 16;
	}
	Metadata *Ops[3] = {
	ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
	Op->getOperand(1),
	ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
	ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
	Changed = true;
	}
	}
	}

	// "Objective-C Class Properties" is recently added for Objective-C. We
	// upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
	// flag of value 0, so we can correclty downgrade this flag when trying to
	// link an ObjC bitcode without this module flag with an ObjC bitcode with
	// this module flag.
	if (HasObjCFlag && !HasClassProperties) {
	M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
	(uint32_t)0);
	Changed = true;
	}

	if (HasSwiftVersionFlag) {
	M.addModuleFlag(Module::Error, "Swift ABI Version",
	SwiftABIVersion);
	M.addModuleFlag(Module::Error, "Swift Major Version",
	ConstantInt::get(Int8Ty, SwiftMajorVersion));
	M.addModuleFlag(Module::Error, "Swift Minor Version",
	ConstantInt::get(Int8Ty, SwiftMinorVersion));
	Changed = true;
	}

	return Changed;
	}

	void llvm::UpgradeSectionAttributes(Module &M) {
	auto TrimSpaces = [](StringRef Section) -> std::string {
	SmallVector<StringRef, 5> Components;
	Section.split(Components, ',');

	SmallString<32> Buffer;
	raw_svector_ostream OS(Buffer);

	for (auto Component : Components)
	OS << ',' << Component.trim();

	return std::string(OS.str().substr(1));
	};

	for (auto &GV : M.globals()) {
	if (!GV.hasSection())
	continue;

	StringRef Section = GV.getSection();

	if (!Section.startswith("__DATA, __objc_catlist"))
	continue;

	// __DATA, __objc_catlist, regular, no_dead_strip
	// __DATA,__objc_catlist,regular,no_dead_strip
	GV.setSection(TrimSpaces(Section));
	}
	}

	namespace {
	// Prior to LLVM 10.0, the strictfp attribute could be used on individual
	// callsites within a function that did not also have the strictfp attribute.
	// Since 10.0, if strict FP semantics are needed within a function, the
	// function must have the strictfp attribute and all calls within the function
	// must also have the strictfp attribute. This latter restriction is
	// necessary to prevent unwanted libcall simplification when a function is
	// being cloned (such as for inlining).
	//
	// The "dangling" strictfp attribute usage was only used to prevent constant
	// folding and other libcall simplification. The nobuiltin attribute on the
	// callsite has the same effect.
	struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
	StrictFPUpgradeVisitor() = default;

	void visitCallBase(CallBase &Call) {
	if (!Call.isStrictFP())
	return;
	if (isa<ConstrainedFPIntrinsic>(&Call))
	return;
	// If we get here, the caller doesn't have the strictfp attribute
	// but this callsite does. Replace the strictfp attribute with nobuiltin.
	Call.removeFnAttr(Attribute::StrictFP);
	Call.addFnAttr(Attribute::NoBuiltin);
	}
	};
	} // namespace

	void llvm::UpgradeFunctionAttributes(Function &F) {
	// If a function definition doesn't have the strictfp attribute,
	// convert any callsite strictfp attributes to nobuiltin.
	if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
	StrictFPUpgradeVisitor SFPV;
	SFPV.visit(F);
	}

	// Remove all incompatibile attributes from function.
	F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
	for (auto &Arg : F.args())
	Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
	}

	static bool isOldLoopArgument(Metadata *MD) {
	auto *T = dyn_cast_or_null<MDTuple>(MD);
	if (!T)
	return false;
	if (T->getNumOperands() < 1)
	return false;
	auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
	if (!S)
	return false;
	return S->getString().startswith("llvm.vectorizer.");
	}

	static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) {
	StringRef OldPrefix = "llvm.vectorizer.";
	assert(OldTag.startswith(OldPrefix) && "Expected old prefix");

	if (OldTag == "llvm.vectorizer.unroll")
	return MDString::get(C, "llvm.loop.interleave.count");

	return MDString::get(
	C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
	.str());
	}

	static Metadata upgradeLoopArgument(Metadata MD) {
	auto *T = dyn_cast_or_null<MDTuple>(MD);
	if (!T)
	return MD;
	if (T->getNumOperands() < 1)
	return MD;
	auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
	if (!OldTag)
	return MD;
	if (!OldTag->getString().startswith("llvm.vectorizer."))
	return MD;

	// This has an old tag. Upgrade it.
	SmallVector<Metadata *, 8> Ops;
	Ops.reserve(T->getNumOperands());
	Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
	for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
	Ops.push_back(T->getOperand(I));

	return MDTuple::get(T->getContext(), Ops);
	}

	MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
	auto *T = dyn_cast<MDTuple>(&N);
	if (!T)
	return &N;

	if (none_of(T->operands(), isOldLoopArgument))
	return &N;

	SmallVector<Metadata *, 8> Ops;
	Ops.reserve(T->getNumOperands());
	for (Metadata *MD : T->operands())
	Ops.push_back(upgradeLoopArgument(MD));

	return MDTuple::get(T->getContext(), Ops);
	}

	std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
	Triple T(TT);
	// For AMDGPU we uprgrade older DataLayouts to include the default globals
	// address space of 1.
	if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
	return DL.empty() ? std::string("G1") : (DL + "-G1").str();
	}

	std::string Res = DL.str();
	if (!T.isX86())
	return Res;

	// If the datalayout matches the expected format, add pointer size address
	// spaces to the datalayout.
	std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
	if (!DL.contains(AddrSpaces)) {
	SmallVector<StringRef, 4> Groups;
	Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
	if (R.match(DL, &Groups))
	Res = (Groups[1] + AddrSpaces + Groups[3]).str();
	}

	// For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
	// Raising the alignment is safe because Clang did not produce f80 values in
	// the MSVC environment before this upgrade was added.
	if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
	StringRef Ref = Res;
	auto I = Ref.find("-f80:32-");
	if (I != StringRef::npos)
	Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
	}

	return Res;
	}

	void llvm::UpgradeAttributes(AttrBuilder &B) {
	StringRef FramePointer;
	Attribute A = B.getAttribute("no-frame-pointer-elim");
	if (A.isValid()) {
	// The value can be "true" or "false".
	FramePointer = A.getValueAsString() == "true" ? "all" : "none";
	B.removeAttribute("no-frame-pointer-elim");
	}
	if (B.contains("no-frame-pointer-elim-non-leaf")) {
	// The value is ignored. "no-frame-pointer-elim"="true" takes priority.
	if (FramePointer != "all")
	FramePointer = "non-leaf";
	B.removeAttribute("no-frame-pointer-elim-non-leaf");
	}
	if (!FramePointer.empty())
	B.addAttribute("frame-pointer", FramePointer);

	A = B.getAttribute("null-pointer-is-valid");
	if (A.isValid()) {
	// The value can be "true" or "false".
	bool NullPointerIsValid = A.getValueAsString() == "true";
	B.removeAttribute("null-pointer-is-valid");
	if (NullPointerIsValid)
	B.addAttribute(Attribute::NullPointerIsValid);
	}
	}

	void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {

	// clang.arc.attachedcall bundles are now required to have an operand.
	// If they don't, it's okay to drop them entirely: when there is an operand,
	// the "attachedcall" is meaningful and required, but without an operand,
	// it's just a marker NOP. Dropping it merely prevents an optimization.
	erase_if(Bundles, [&](OperandBundleDef &OBD) {
	return OBD.getTag() == "clang.arc.attachedcall" &&
	OBD.inputs().empty();
	});
	}
	diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
	index bf145bffe8bf..23ac012b9e00 100644
	--- a/llvm/lib/Support/Unix/Signals.inc
	+++ b/llvm/lib/Support/Unix/Signals.inc
	@@ -1,663 +1,659 @@
	//===- Signals.cpp - Generic Unix Signals Implementation ------ C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines some helpful functions for dealing with the possibility of
	// Unix signals occurring while your program is running.
	//
	//===----------------------------------------------------------------------===//
	//
	// This file is extremely careful to only do signal-safe things while in a
	// signal handler. In particular, memory allocation and acquiring a mutex
	// while in a signal handler should never occur. ManagedStatic isn't usable from
	// a signal handler for 2 reasons:
	//
	// 1. Creating a new one allocates.
	// 2. The signal handler could fire while llvm_shutdown is being processed, in
	// which case the ManagedStatic is in an unknown state because it could
	// already have been destroyed, or be in the process of being destroyed.
	//
	// Modifying the behavior of the signal handlers (such as registering new ones)
	// can acquire a mutex, but all this guarantees is that the signal handler
	// behavior is only modified by one thread at a time. A signal handler can still
	// fire while this occurs!
	//
	// Adding work to a signal handler requires lock-freedom (and assume atomics are
	// always lock-free) because the signal handler could fire while new work is
	// being added.
	//
	//===----------------------------------------------------------------------===//

	#include "Unix.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/Config/config.h"
	#include "llvm/Demangle/Demangle.h"
	#include "llvm/Support/ExitCodes.h"
	#include "llvm/Support/FileSystem.h"
	#include "llvm/Support/FileUtilities.h"
	#include "llvm/Support/Format.h"
	#include "llvm/Support/MemoryBuffer.h"
	#include "llvm/Support/Mutex.h"
	#include "llvm/Support/Program.h"
	#include "llvm/Support/SaveAndRestore.h"
	#include "llvm/Support/raw_ostream.h"
	#include <algorithm>
	#include <string>
	#ifdef HAVE_BACKTRACE
	# include BACKTRACE_HEADER // For backtrace().
	#endif
	#if HAVE_SIGNAL_H
	#include <signal.h>
	#endif
	#if HAVE_SYS_STAT_H
	#include <sys/stat.h>
	#endif
	#if HAVE_DLFCN_H
	#include <dlfcn.h>
	#endif
	#if HAVE_MACH_MACH_H
	#include <mach/mach.h>
	#endif
	#if HAVE_LINK_H
	#include <link.h>
	#endif
	#ifdef HAVE__UNWIND_BACKTRACE
	// FIXME: We should be able to use <unwind.h> for any target that has an
	// _Unwind_Backtrace function, but on FreeBSD the configure test passes
	// despite the function not existing, and on Android, <unwind.h> conflicts
	// with <link.h>.
	#ifdef __GLIBC__
	#include <unwind.h>
	#else
	#undef HAVE__UNWIND_BACKTRACE
	#endif
	#endif

	using namespace llvm;

	static void SignalHandler(int Sig); // defined below.
	static void InfoSignalHandler(int Sig); // defined below.

	using SignalHandlerFunctionType = void (*)();
	/// The function to call if ctrl-c is pressed.
	static std::atomic<SignalHandlerFunctionType> InterruptFunction =
	ATOMIC_VAR_INIT(nullptr);
	static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
	ATOMIC_VAR_INIT(nullptr);
	/// The function to call on SIGPIPE (one-time use only).
	static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction =
	ATOMIC_VAR_INIT(nullptr);

	namespace {
	/// Signal-safe removal of files.
	/// Inserting and erasing from the list isn't signal-safe, but removal of files
	/// themselves is signal-safe. Memory is freed when the head is freed, deletion
	/// is therefore not signal-safe either.
	class FileToRemoveList {
	std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr);
	std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr);

	FileToRemoveList() = default;
	// Not signal-safe.
	FileToRemoveList(const std::string &str) : Filename(strdup(str.c_str())) {}

	public:
	// Not signal-safe.
	~FileToRemoveList() {
	if (FileToRemoveList *N = Next.exchange(nullptr))
	delete N;
	if (char *F = Filename.exchange(nullptr))
	free(F);
	}

	// Not signal-safe.
	static void insert(std::atomic<FileToRemoveList *> &Head,
	const std::string &Filename) {
	// Insert the new file at the end of the list.
	FileToRemoveList *NewHead = new FileToRemoveList(Filename);
	std::atomic<FileToRemoveList > InsertionPoint = &Head;
	FileToRemoveList *OldHead = nullptr;
	while (!InsertionPoint->compare_exchange_strong(OldHead, NewHead)) {
	InsertionPoint = &OldHead->Next;
	OldHead = nullptr;
	}
	}

	// Not signal-safe.
	static void erase(std::atomic<FileToRemoveList *> &Head,
	const std::string &Filename) {
	// Use a lock to avoid concurrent erase: the comparison would access
	// free'd memory.
	static ManagedStatic<sys::SmartMutex<true>> Lock;
	sys::SmartScopedLock<true> Writer(*Lock);

	for (FileToRemoveList *Current = Head.load(); Current;
	Current = Current->Next.load()) {
	if (char *OldFilename = Current->Filename.load()) {
	if (OldFilename != Filename)
	continue;
	// Leave an empty filename.
	OldFilename = Current->Filename.exchange(nullptr);
	// The filename might have become null between the time we
	// compared it and we exchanged it.
	if (OldFilename)
	free(OldFilename);
	}
	}
	}

	// Signal-safe.
	static void removeAllFiles(std::atomic<FileToRemoveList *> &Head) {
	// If cleanup were to occur while we're removing files we'd have a bad time.
	// Make sure we're OK by preventing cleanup from doing anything while we're
	// removing files. If cleanup races with us and we win we'll have a leak,
	// but we won't crash.
	FileToRemoveList *OldHead = Head.exchange(nullptr);

	for (FileToRemoveList *currentFile = OldHead; currentFile;
	currentFile = currentFile->Next.load()) {
	// If erasing was occuring while we're trying to remove files we'd look
	// at free'd data. Take away the path and put it back when done.
	if (char *path = currentFile->Filename.exchange(nullptr)) {
	// Get the status so we can determine if it's a file or directory. If we
	// can't stat the file, ignore it.
	struct stat buf;
	if (stat(path, &buf) != 0)
	continue;

	// If this is not a regular file, ignore it. We want to prevent removal
	// of special files like /dev/null, even if the compiler is being run
	// with the super-user permissions.
	if (!S_ISREG(buf.st_mode))
	continue;

	// Otherwise, remove the file. We ignore any errors here as there is
	// nothing else we can do.
	unlink(path);

	// We're done removing the file, erasing can safely proceed.
	currentFile->Filename.exchange(path);
	}
	}

	// We're done removing files, cleanup can safely proceed.
	Head.exchange(OldHead);
	}
	};
	static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr);

	/// Clean up the list in a signal-friendly manner.
	/// Recall that signals can fire during llvm_shutdown. If this occurs we should
	/// either clean something up or nothing at all, but we shouldn't crash!
	struct FilesToRemoveCleanup {
	// Not signal-safe.
	~FilesToRemoveCleanup() {
	FileToRemoveList *Head = FilesToRemove.exchange(nullptr);
	if (Head)
	delete Head;
	}
	};
	} // namespace

	static StringRef Argv0;

	/// Signals that represent requested termination. There's no bug or failure, or
	/// if there is, it's not our direct responsibility. For whatever reason, our
	/// continued execution is no longer desirable.
	static const int IntSigs[] = {
	SIGHUP, SIGINT, SIGTERM, SIGUSR2
	};

	/// Signals that represent that we have a bug, and our prompt termination has
	/// been ordered.
	static const int KillSigs[] = {
	SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV, SIGQUIT
	#ifdef SIGSYS
	, SIGSYS
	#endif
	#ifdef SIGXCPU
	, SIGXCPU
	#endif
	#ifdef SIGXFSZ
	, SIGXFSZ
	#endif
	#ifdef SIGEMT
	, SIGEMT
	#endif
	};

	/// Signals that represent requests for status.
	static const int InfoSigs[] = {
	SIGUSR1
	#ifdef SIGINFO
	, SIGINFO
	#endif
	};

	static const size_t NumSigs =
	array_lengthof(IntSigs) + array_lengthof(KillSigs) +
	array_lengthof(InfoSigs) + 1 /* SIGPIPE */;


	static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
	static struct {
	struct sigaction SA;
	int SigNo;
	} RegisteredSignalInfo[NumSigs];

	#if defined(HAVE_SIGALTSTACK)
	// Hold onto both the old and new alternate signal stack so that it's not
	// reported as a leak. We don't make any attempt to remove our alt signal
	// stack if we remove our signal handlers; that can't be done reliably if
	// someone else is also trying to do the same thing.
	static stack_t OldAltStack;
	LLVM_ATTRIBUTE_USED static void *NewAltStackPointer;

	static void CreateSigAltStack() {
	const size_t AltStackSize = MINSIGSTKSZ + 64 * 1024;

	// If we're executing on the alternate stack, or we already have an alternate
	// signal stack that we're happy with, there's nothing for us to do. Don't
	// reduce the size, some other part of the process might need a larger stack
	// than we do.
	if (sigaltstack(nullptr, &OldAltStack) != 0 \|\|
	OldAltStack.ss_flags & SS_ONSTACK \|\|
	(OldAltStack.ss_sp && OldAltStack.ss_size >= AltStackSize))
	return;

	stack_t AltStack = {};
	AltStack.ss_sp = static_cast<char *>(safe_malloc(AltStackSize));
	NewAltStackPointer = AltStack.ss_sp; // Save to avoid reporting a leak.
	AltStack.ss_size = AltStackSize;
	if (sigaltstack(&AltStack, &OldAltStack) != 0)
	free(AltStack.ss_sp);
	}
	#else
	static void CreateSigAltStack() {}
	#endif

	static void RegisterHandlers() { // Not signal-safe.
	// The mutex prevents other threads from registering handlers while we're
	// doing it. We also have to protect the handlers and their count because
	// a signal handler could fire while we're registeting handlers.
	static ManagedStatic<sys::SmartMutex<true>> SignalHandlerRegistrationMutex;
	sys::SmartScopedLock<true> Guard(*SignalHandlerRegistrationMutex);

	// If the handlers are already registered, we're done.
	if (NumRegisteredSignals.load() != 0)
	return;

	// Create an alternate stack for signal handling. This is necessary for us to
	// be able to reliably handle signals due to stack overflow.
	CreateSigAltStack();

	enum class SignalKind { IsKill, IsInfo };
	auto registerHandler = [&](int Signal, SignalKind Kind) {
	unsigned Index = NumRegisteredSignals.load();
	assert(Index < array_lengthof(RegisteredSignalInfo) &&
	"Out of space for signal handlers!");

	struct sigaction NewHandler;

	switch (Kind) {
	case SignalKind::IsKill:
	NewHandler.sa_handler = SignalHandler;
	NewHandler.sa_flags = SA_NODEFER \| SA_RESETHAND \| SA_ONSTACK;
	break;
	case SignalKind::IsInfo:
	NewHandler.sa_handler = InfoSignalHandler;
	NewHandler.sa_flags = SA_ONSTACK;
	break;
	}
	sigemptyset(&NewHandler.sa_mask);

	// Install the new handler, save the old one in RegisteredSignalInfo.
	sigaction(Signal, &NewHandler, &RegisteredSignalInfo[Index].SA);
	RegisteredSignalInfo[Index].SigNo = Signal;
	++NumRegisteredSignals;
	};

	for (auto S : IntSigs)
	registerHandler(S, SignalKind::IsKill);
	for (auto S : KillSigs)
	registerHandler(S, SignalKind::IsKill);
	if (OneShotPipeSignalFunction)
	registerHandler(SIGPIPE, SignalKind::IsKill);
	for (auto S : InfoSigs)
	registerHandler(S, SignalKind::IsInfo);
	}

	void sys::unregisterHandlers() {
	// Restore all of the signal handlers to how they were before we showed up.
	for (unsigned i = 0, e = NumRegisteredSignals.load(); i != e; ++i) {
	sigaction(RegisteredSignalInfo[i].SigNo,
	&RegisteredSignalInfo[i].SA, nullptr);
	--NumRegisteredSignals;
	}
	}

	/// Process the FilesToRemove list.
	static void RemoveFilesToRemove() {
	FileToRemoveList::removeAllFiles(FilesToRemove);
	}

	void sys::CleanupOnSignal(uintptr_t Context) {
	int Sig = (int)Context;

	if (llvm::is_contained(InfoSigs, Sig)) {
	InfoSignalHandler(Sig);
	return;
	}

	RemoveFilesToRemove();

	if (llvm::is_contained(IntSigs, Sig) \|\| Sig == SIGPIPE)
	return;

	llvm::sys::RunSignalHandlers();
	}

	// The signal handler that runs.
	static void SignalHandler(int Sig) {
	// Restore the signal behavior to default, so that the program actually
	// crashes when we return and the signal reissues. This also ensures that if
	// we crash in our signal handler that the program will terminate immediately
	// instead of recursing in the signal handler.
	sys::unregisterHandlers();

	// Unmask all potentially blocked kill signals.
	sigset_t SigMask;
	sigfillset(&SigMask);
	sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);

	{
	RemoveFilesToRemove();

	if (Sig == SIGPIPE)
	if (auto OldOneShotPipeFunction =
	OneShotPipeSignalFunction.exchange(nullptr))
	return OldOneShotPipeFunction();

	bool IsIntSig = llvm::is_contained(IntSigs, Sig);
	if (IsIntSig)
	if (auto OldInterruptFunction = InterruptFunction.exchange(nullptr))
	return OldInterruptFunction();

	if (Sig == SIGPIPE \|\| IsIntSig) {
	raise(Sig); // Execute the default handler.
	return;
	}
	}

	// Otherwise if it is a fault (like SEGV) run any handler.
	llvm::sys::RunSignalHandlers();

	#ifdef __s390__
	// On S/390, certain signals are delivered with PSW Address pointing to
	// after the faulting instruction. Simply returning from the signal
	// handler would continue execution after that point, instead of
	// re-raising the signal. Raise the signal manually in those cases.
	if (Sig == SIGILL \|\| Sig == SIGFPE \|\| Sig == SIGTRAP)
	raise(Sig);
	#endif
	}

	static void InfoSignalHandler(int Sig) {
	SaveAndRestore<int> SaveErrnoDuringASignalHandler(errno);
	if (SignalHandlerFunctionType CurrentInfoFunction = InfoSignalFunction)
	CurrentInfoFunction();
	}

	void llvm::sys::RunInterruptHandlers() {
	RemoveFilesToRemove();
	}

	void llvm::sys::SetInterruptFunction(void (*IF)()) {
	InterruptFunction.exchange(IF);
	RegisterHandlers();
	}

	void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
	InfoSignalFunction.exchange(Handler);
	RegisterHandlers();
	}

	void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
	OneShotPipeSignalFunction.exchange(Handler);
	RegisterHandlers();
	}

	void llvm::sys::DefaultOneShotPipeSignalHandler() {
	- // UNIX03 conformance requires a non-zero exit code and an error message
	- // to stderr when writing to a closed stdout fails.
	- errs() << "error: write on a pipe with no reader\n";
	-
	// Send a special return code that drivers can check for, from sysexits.h.
	exit(EX_IOERR);
	}

	// The public API
	bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
	std::string* ErrMsg) {
	// Ensure that cleanup will occur as soon as one file is added.
	static ManagedStatic<FilesToRemoveCleanup> FilesToRemoveCleanup;
	*FilesToRemoveCleanup;
	FileToRemoveList::insert(FilesToRemove, Filename.str());
	RegisterHandlers();
	return false;
	}

	// The public API
	void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
	FileToRemoveList::erase(FilesToRemove, Filename.str());
	}

	/// Add a function to be called when a signal is delivered to the process. The
	/// handler can have a cookie passed to it to identify what instance of the
	/// handler it is.
	void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr,
	void *Cookie) { // Signal-safe.
	insertSignalHandler(FnPtr, Cookie);
	RegisterHandlers();
	}

	#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H && \
	(defined(__linux__) \|\| defined(__FreeBSD__) \|\| \
	defined(__FreeBSD_kernel__) \|\| defined(__NetBSD__))
	struct DlIteratePhdrData {
	void **StackTrace;
	int depth;
	bool first;
	const char **modules;
	intptr_t *offsets;
	const char *main_exec_name;
	};

	static int dl_iterate_phdr_cb(dl_phdr_info info, size_t size, void arg) {
	DlIteratePhdrData data = (DlIteratePhdrData)arg;
	const char *name = data->first ? data->main_exec_name : info->dlpi_name;
	data->first = false;
	for (int i = 0; i < info->dlpi_phnum; i++) {
	const auto *phdr = &info->dlpi_phdr[i];
	if (phdr->p_type != PT_LOAD)
	continue;
	intptr_t beg = info->dlpi_addr + phdr->p_vaddr;
	intptr_t end = beg + phdr->p_memsz;
	for (int j = 0; j < data->depth; j++) {
	if (data->modules[j])
	continue;
	intptr_t addr = (intptr_t)data->StackTrace[j];
	if (beg <= addr && addr < end) {
	data->modules[j] = name;
	data->offsets[j] = addr - info->dlpi_addr;
	}
	}
	}
	return 0;
	}

	/// If this is an ELF platform, we can find all loaded modules and their virtual
	/// addresses with dl_iterate_phdr.
	static bool findModulesAndOffsets(void **StackTrace, int Depth,
	const char *Modules, intptr_t Offsets,
	const char *MainExecutableName,
	StringSaver &StrPool) {
	DlIteratePhdrData data = {StackTrace, Depth, true,
	Modules, Offsets, MainExecutableName};
	dl_iterate_phdr(dl_iterate_phdr_cb, &data);
	return true;
	}
	#else
	/// This platform does not have dl_iterate_phdr, so we do not yet know how to
	/// find all loaded DSOs.
	static bool findModulesAndOffsets(void **StackTrace, int Depth,
	const char *Modules, intptr_t Offsets,
	const char *MainExecutableName,
	StringSaver &StrPool) {
	return false;
	}
	#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ...

	#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
	static int unwindBacktrace(void **StackTrace, int MaxEntries) {
	if (MaxEntries < 0)
	return 0;

	// Skip the first frame ('unwindBacktrace' itself).
	int Entries = -1;

	auto HandleFrame = [&](_Unwind_Context *Context) -> _Unwind_Reason_Code {
	// Apparently we need to detect reaching the end of the stack ourselves.
	void IP = (void )_Unwind_GetIP(Context);
	if (!IP)
	return _URC_END_OF_STACK;

	assert(Entries < MaxEntries && "recursively called after END_OF_STACK?");
	if (Entries >= 0)
	StackTrace[Entries] = IP;

	if (++Entries == MaxEntries)
	return _URC_END_OF_STACK;
	return _URC_NO_REASON;
	};

	_Unwind_Backtrace(
	[](_Unwind_Context Context, void Handler) {
	return (static_cast<decltype(HandleFrame) >(Handler))(Context);
	},
	static_cast<void *>(&HandleFrame));
	return std::max(Entries, 0);
	}
	#endif

	// In the case of a program crash or fault, print out a stack trace so that the
	// user has an indication of why and where we died.
	//
	// On glibc systems we have the 'backtrace' function, which works nicely, but
	// doesn't demangle symbols.
	void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
	#if ENABLE_BACKTRACES
	static void *StackTrace[256];
	int depth = 0;
	#if defined(HAVE_BACKTRACE)
	// Use backtrace() to output a backtrace on Linux systems with glibc.
	if (!depth)
	depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace)));
	#endif
	#if defined(HAVE__UNWIND_BACKTRACE)
	// Try _Unwind_Backtrace() if backtrace() failed.
	if (!depth)
	depth = unwindBacktrace(StackTrace,
	static_cast<int>(array_lengthof(StackTrace)));
	#endif
	if (!depth)
	return;
	// If "Depth" is not provided by the caller, use the return value of
	// backtrace() for printing a symbolized stack trace.
	if (!Depth)
	Depth = depth;
	if (printSymbolizedStackTrace(Argv0, StackTrace, Depth, OS))
	return;
	OS << "Stack dump without symbol names (ensure you have llvm-symbolizer in "
	"your PATH or set the environment var `LLVM_SYMBOLIZER_PATH` to point "
	"to it):\n";
	#if HAVE_DLFCN_H && HAVE_DLADDR
	int width = 0;
	for (int i = 0; i < depth; ++i) {
	Dl_info dlinfo;
	dladdr(StackTrace[i], &dlinfo);
	const char* name = strrchr(dlinfo.dli_fname, '/');

	int nwidth;
	if (!name) nwidth = strlen(dlinfo.dli_fname);
	else nwidth = strlen(name) - 1;

	if (nwidth > width) width = nwidth;
	}

	for (int i = 0; i < depth; ++i) {
	Dl_info dlinfo;
	dladdr(StackTrace[i], &dlinfo);

	OS << format("%-2d", i);

	const char* name = strrchr(dlinfo.dli_fname, '/');
	if (!name) OS << format(" %-*s", width, dlinfo.dli_fname);
	else OS << format(" %-*s", width, name+1);

	OS << format(" %#0lx", (int)(sizeof(void) * 2) + 2,
	(unsigned long)StackTrace[i]);

	if (dlinfo.dli_sname != nullptr) {
	OS << ' ';
	int res;
	char* d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res);
	if (!d) OS << dlinfo.dli_sname;
	else OS << d;
	free(d);

	OS << format(" + %tu", (static_cast<const char*>(StackTrace[i])-
	static_cast<const char*>(dlinfo.dli_saddr)));
	}
	OS << '\n';
	}
	#elif defined(HAVE_BACKTRACE)
	backtrace_symbols_fd(StackTrace, Depth, STDERR_FILENO);
	#endif
	#endif
	}

	static void PrintStackTraceSignalHandler(void *) {
	sys::PrintStackTrace(llvm::errs());
	}

	void llvm::sys::DisableSystemDialogsOnCrash() {}

	/// When an error signal (such as SIGABRT or SIGSEGV) is delivered to the
	/// process, print a stack trace and then exit.
	void llvm::sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
	bool DisableCrashReporting) {
	::Argv0 = Argv0;

	AddSignalHandler(PrintStackTraceSignalHandler, nullptr);

	#if defined(__APPLE__) && ENABLE_CRASH_OVERRIDES
	// Environment variable to disable any kind of crash dialog.
	if (DisableCrashReporting \|\| getenv("LLVM_DISABLE_CRASH_REPORT")) {
	mach_port_t self = mach_task_self();

	exception_mask_t mask = EXC_MASK_CRASH;

	kern_return_t ret = task_set_exception_ports(self,
	mask,
	MACH_PORT_NULL,
	EXCEPTION_STATE_IDENTITY \| MACH_EXCEPTION_CODES,
	THREAD_STATE_NONE);
	(void)ret;
	}
	#endif
	}
	diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	index c28216048d7c..06e21f90ebf1 100644
	--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
	@@ -1,21960 +1,21959 @@
	//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the AArch64TargetLowering class.
	//
	//===----------------------------------------------------------------------===//

	#include "AArch64ISelLowering.h"
	#include "AArch64CallingConvention.h"
	#include "AArch64ExpandImm.h"
	#include "AArch64MachineFunctionInfo.h"
	#include "AArch64PerfectShuffle.h"
	#include "AArch64RegisterInfo.h"
	#include "AArch64Subtarget.h"
	#include "MCTargetDesc/AArch64AddressingModes.h"
	#include "Utils/AArch64BaseInfo.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Triple.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/Analysis/MemoryLocation.h"
	#include "llvm/Analysis/ObjCARCUtil.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/MachineBasicBlock.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstr.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineMemOperand.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/RuntimeLibcalls.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/SelectionDAGNodes.h"
	#include "llvm/CodeGen/TargetCallingConv.h"
	#include "llvm/CodeGen/TargetInstrInfo.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugLoc.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/OperandTraits.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/Value.h"
	#include "llvm/MC/MCRegisterInfo.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CodeGen.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MachineValueType.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Target/TargetOptions.h"
	#include <algorithm>
	#include <bitset>
	#include <cassert>
	#include <cctype>
	#include <cstdint>
	#include <cstdlib>
	#include <iterator>
	#include <limits>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace llvm;
	using namespace llvm::PatternMatch;

	#define DEBUG_TYPE "aarch64-lower"

	STATISTIC(NumTailCalls, "Number of tail calls");
	STATISTIC(NumShiftInserts, "Number of vector shift inserts");
	STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");

	// FIXME: The necessary dtprel relocations don't seem to be supported
	// well in the GNU bfd and gold linkers at the moment. Therefore, by
	// default, for now, fall back to GeneralDynamic code generation.
	cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
	"aarch64-elf-ldtls-generation", cl::Hidden,
	cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
	cl::init(false));

	static cl::opt<bool>
	EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
	cl::desc("Enable AArch64 logical imm instruction "
	"optimization"),
	cl::init(true));

	// Temporary option added for the purpose of testing functionality added
	// to DAGCombiner.cpp in D92230. It is expected that this can be removed
	// in future when both implementations will be based off MGATHER rather
	// than the GLD1 nodes added for the SVE gather load intrinsics.
	static cl::opt<bool>
	EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
	cl::desc("Combine extends of AArch64 masked "
	"gather intrinsics"),
	cl::init(true));

	/// Value type used for condition codes.
	static const MVT MVT_CC = MVT::i32;

	static inline EVT getPackedSVEVectorVT(EVT VT) {
	switch (VT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for vector");
	case MVT::i8:
	return MVT::nxv16i8;
	case MVT::i16:
	return MVT::nxv8i16;
	case MVT::i32:
	return MVT::nxv4i32;
	case MVT::i64:
	return MVT::nxv2i64;
	case MVT::f16:
	return MVT::nxv8f16;
	case MVT::f32:
	return MVT::nxv4f32;
	case MVT::f64:
	return MVT::nxv2f64;
	case MVT::bf16:
	return MVT::nxv8bf16;
	}
	}

	// NOTE: Currently there's only a need to return integer vector types. If this
	// changes then just add an extra "type" parameter.
	static inline EVT getPackedSVEVectorVT(ElementCount EC) {
	switch (EC.getKnownMinValue()) {
	default:
	llvm_unreachable("unexpected element count for vector");
	case 16:
	return MVT::nxv16i8;
	case 8:
	return MVT::nxv8i16;
	case 4:
	return MVT::nxv4i32;
	case 2:
	return MVT::nxv2i64;
	}
	}

	static inline EVT getPromotedVTForPredicate(EVT VT) {
	assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&
	"Expected scalable predicate vector type!");
	switch (VT.getVectorMinNumElements()) {
	default:
	llvm_unreachable("unexpected element count for vector");
	case 2:
	return MVT::nxv2i64;
	case 4:
	return MVT::nxv4i32;
	case 8:
	return MVT::nxv8i16;
	case 16:
	return MVT::nxv16i8;
	}
	}

	/// Returns true if VT's elements occupy the lowest bit positions of its
	/// associated register class without any intervening space.
	///
	/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
	/// same register class, but only nxv8f16 can be treated as a packed vector.
	static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
	assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal vector type!");
	return VT.isFixedLengthVector() \|\|
	VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
	}

	// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
	// predicate and end with a passthru value matching the result type.
	static bool isMergePassthruOpcode(unsigned Opc) {
	switch (Opc) {
	default:
	return false;
	case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
	case AArch64ISD::BSWAP_MERGE_PASSTHRU:
	case AArch64ISD::REVH_MERGE_PASSTHRU:
	case AArch64ISD::REVW_MERGE_PASSTHRU:
	case AArch64ISD::REVD_MERGE_PASSTHRU:
	case AArch64ISD::CTLZ_MERGE_PASSTHRU:
	case AArch64ISD::CTPOP_MERGE_PASSTHRU:
	case AArch64ISD::DUP_MERGE_PASSTHRU:
	case AArch64ISD::ABS_MERGE_PASSTHRU:
	case AArch64ISD::NEG_MERGE_PASSTHRU:
	case AArch64ISD::FNEG_MERGE_PASSTHRU:
	case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
	case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
	case AArch64ISD::FCEIL_MERGE_PASSTHRU:
	case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
	case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
	case AArch64ISD::FRINT_MERGE_PASSTHRU:
	case AArch64ISD::FROUND_MERGE_PASSTHRU:
	case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
	case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
	case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
	case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
	case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
	case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
	case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
	case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
	case AArch64ISD::FSQRT_MERGE_PASSTHRU:
	case AArch64ISD::FRECPX_MERGE_PASSTHRU:
	case AArch64ISD::FABS_MERGE_PASSTHRU:
	return true;
	}
	}

	// Returns true if inactive lanes are known to be zeroed by construction.
	static bool isZeroingInactiveLanes(SDValue Op) {
	switch (Op.getOpcode()) {
	default:
	// We guarantee i1 splat_vectors to zero the other lanes by
	// implementing it with ptrue and possibly a punpklo for nxv1i1.
	if (ISD::isConstantSplatVectorAllOnes(Op.getNode()))
	return true;
	return false;
	case AArch64ISD::PTRUE:
	case AArch64ISD::SETCC_MERGE_ZERO:
	return true;
	case ISD::INTRINSIC_WO_CHAIN:
	switch (Op.getConstantOperandVal(0)) {
	default:
	return false;
	case Intrinsic::aarch64_sve_ptrue:
	case Intrinsic::aarch64_sve_pnext:
	case Intrinsic::aarch64_sve_cmpeq:
	case Intrinsic::aarch64_sve_cmpne:
	case Intrinsic::aarch64_sve_cmpge:
	case Intrinsic::aarch64_sve_cmpgt:
	case Intrinsic::aarch64_sve_cmphs:
	case Intrinsic::aarch64_sve_cmphi:
	case Intrinsic::aarch64_sve_cmpeq_wide:
	case Intrinsic::aarch64_sve_cmpne_wide:
	case Intrinsic::aarch64_sve_cmpge_wide:
	case Intrinsic::aarch64_sve_cmpgt_wide:
	case Intrinsic::aarch64_sve_cmplt_wide:
	case Intrinsic::aarch64_sve_cmple_wide:
	case Intrinsic::aarch64_sve_cmphs_wide:
	case Intrinsic::aarch64_sve_cmphi_wide:
	case Intrinsic::aarch64_sve_cmplo_wide:
	case Intrinsic::aarch64_sve_cmpls_wide:
	case Intrinsic::aarch64_sve_fcmpeq:
	case Intrinsic::aarch64_sve_fcmpne:
	case Intrinsic::aarch64_sve_fcmpge:
	case Intrinsic::aarch64_sve_fcmpgt:
	case Intrinsic::aarch64_sve_fcmpuo:
	return true;
	}
	}
	}

	AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
	const AArch64Subtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {
	// AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
	// we have to make something up. Arbitrarily, choose ZeroOrOne.
	setBooleanContents(ZeroOrOneBooleanContent);
	// When comparing vectors the result sets the different elements in the
	// vector to all-one or all-zero.
	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);

	// Set up the register classes.
	addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
	addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);

	if (Subtarget->hasLS64()) {
	addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
	setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
	setOperationAction(ISD::STORE, MVT::i64x8, Custom);
	}

	if (Subtarget->hasFPARMv8()) {
	addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
	addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
	addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
	addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
	addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
	}

	if (Subtarget->hasNEON()) {
	addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
	addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
	// Someone set us up the NEON.
	addDRTypeForNEON(MVT::v2f32);
	addDRTypeForNEON(MVT::v8i8);
	addDRTypeForNEON(MVT::v4i16);
	addDRTypeForNEON(MVT::v2i32);
	addDRTypeForNEON(MVT::v1i64);
	addDRTypeForNEON(MVT::v1f64);
	addDRTypeForNEON(MVT::v4f16);
	if (Subtarget->hasBF16())
	addDRTypeForNEON(MVT::v4bf16);

	addQRTypeForNEON(MVT::v4f32);
	addQRTypeForNEON(MVT::v2f64);
	addQRTypeForNEON(MVT::v16i8);
	addQRTypeForNEON(MVT::v8i16);
	addQRTypeForNEON(MVT::v4i32);
	addQRTypeForNEON(MVT::v2i64);
	addQRTypeForNEON(MVT::v8f16);
	if (Subtarget->hasBF16())
	addQRTypeForNEON(MVT::v8bf16);
	}

	if (Subtarget->hasSVE() \|\| Subtarget->hasSME()) {
	// Add legal sve predicate types
	addRegisterClass(MVT::nxv1i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
	addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);

	// Add legal sve data types
	addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);

	addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);

	if (Subtarget->hasBF16()) {
	addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
	addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
	}

	if (Subtarget->useSVEForFixedLengthVectors()) {
	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addRegisterClass(VT, &AArch64::ZPRRegClass);

	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addRegisterClass(VT, &AArch64::ZPRRegClass);
	}
	}

	// Compute derived properties from the register classes
	computeRegisterProperties(Subtarget->getRegisterInfo());

	// Provide all sorts of operation actions
	setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
	setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
	setOperationAction(ISD::SETCC, MVT::i32, Custom);
	setOperationAction(ISD::SETCC, MVT::i64, Custom);
	setOperationAction(ISD::SETCC, MVT::f16, Custom);
	setOperationAction(ISD::SETCC, MVT::f32, Custom);
	setOperationAction(ISD::SETCC, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
	setOperationAction(ISD::BR_CC, MVT::i64, Custom);
	setOperationAction(ISD::BR_CC, MVT::f16, Custom);
	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f64, Custom);
	setOperationAction(ISD::SELECT, MVT::i32, Custom);
	setOperationAction(ISD::SELECT, MVT::i64, Custom);
	setOperationAction(ISD::SELECT, MVT::f16, Custom);
	setOperationAction(ISD::SELECT, MVT::f32, Custom);
	setOperationAction(ISD::SELECT, MVT::f64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
	setOperationAction(ISD::BR_JT, MVT::Other, Custom);
	setOperationAction(ISD::JumpTable, MVT::i64, Custom);

	setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
	setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);

	setOperationAction(ISD::FREM, MVT::f32, Expand);
	setOperationAction(ISD::FREM, MVT::f64, Expand);
	setOperationAction(ISD::FREM, MVT::f80, Expand);

	setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);

	// Custom lowering hooks are needed for XOR
	// to fold it into CSINC/CSINV.
	setOperationAction(ISD::XOR, MVT::i32, Custom);
	setOperationAction(ISD::XOR, MVT::i64, Custom);

	// Virtually no operation on f128 is legal, but LLVM can't expand them when
	// there's a valid register class, so we need custom operations in most cases.
	setOperationAction(ISD::FABS, MVT::f128, Expand);
	setOperationAction(ISD::FADD, MVT::f128, LibCall);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
	setOperationAction(ISD::FCOS, MVT::f128, Expand);
	setOperationAction(ISD::FDIV, MVT::f128, LibCall);
	setOperationAction(ISD::FMA, MVT::f128, Expand);
	setOperationAction(ISD::FMUL, MVT::f128, LibCall);
	setOperationAction(ISD::FNEG, MVT::f128, Expand);
	setOperationAction(ISD::FPOW, MVT::f128, Expand);
	setOperationAction(ISD::FREM, MVT::f128, Expand);
	setOperationAction(ISD::FRINT, MVT::f128, Expand);
	setOperationAction(ISD::FSIN, MVT::f128, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
	setOperationAction(ISD::FSQRT, MVT::f128, Expand);
	setOperationAction(ISD::FSUB, MVT::f128, LibCall);
	setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
	setOperationAction(ISD::SETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
	setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
	setOperationAction(ISD::BR_CC, MVT::f128, Custom);
	setOperationAction(ISD::SELECT, MVT::f128, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
	setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
	// FIXME: f128 FMINIMUM and FMAXIMUM (including STRICT versions) currently
	// aren't handled.

	// Lowering for many of the conversions is actually specified by the non-f128
	// type. The LowerXXX function will be trivial when f128 isn't involved.
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
	setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
	setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);

	setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);

	// Variable arguments.
	setOperationAction(ISD::VASTART, MVT::Other, Custom);
	setOperationAction(ISD::VAARG, MVT::Other, Custom);
	setOperationAction(ISD::VACOPY, MVT::Other, Custom);
	setOperationAction(ISD::VAEND, MVT::Other, Expand);

	// Variable-sized objects.
	setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);

	if (Subtarget->isTargetWindows())
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
	else
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);

	// Constant pool entries
	setOperationAction(ISD::ConstantPool, MVT::i64, Custom);

	// BlockAddress
	setOperationAction(ISD::BlockAddress, MVT::i64, Custom);

	// AArch64 lacks both left-rotate and popcount instructions.
	setOperationAction(ISD::ROTL, MVT::i32, Expand);
	setOperationAction(ISD::ROTL, MVT::i64, Expand);
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);
	}

	// AArch64 doesn't have i32 MULH{S\|U}.
	setOperationAction(ISD::MULHU, MVT::i32, Expand);
	setOperationAction(ISD::MULHS, MVT::i32, Expand);

	// AArch64 doesn't have {U\|S}MUL_LOHI.
	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);

	setOperationAction(ISD::CTPOP, MVT::i32, Custom);
	setOperationAction(ISD::CTPOP, MVT::i64, Custom);
	setOperationAction(ISD::CTPOP, MVT::i128, Custom);

	setOperationAction(ISD::PARITY, MVT::i64, Custom);
	setOperationAction(ISD::PARITY, MVT::i128, Custom);

	setOperationAction(ISD::ABS, MVT::i32, Custom);
	setOperationAction(ISD::ABS, MVT::i64, Custom);

	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	}
	setOperationAction(ISD::SREM, MVT::i32, Expand);
	setOperationAction(ISD::SREM, MVT::i64, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
	setOperationAction(ISD::UREM, MVT::i32, Expand);
	setOperationAction(ISD::UREM, MVT::i64, Expand);

	// Custom lower Add/Sub/Mul with overflow.
	setOperationAction(ISD::SADDO, MVT::i32, Custom);
	setOperationAction(ISD::SADDO, MVT::i64, Custom);
	setOperationAction(ISD::UADDO, MVT::i32, Custom);
	setOperationAction(ISD::UADDO, MVT::i64, Custom);
	setOperationAction(ISD::SSUBO, MVT::i32, Custom);
	setOperationAction(ISD::SSUBO, MVT::i64, Custom);
	setOperationAction(ISD::USUBO, MVT::i32, Custom);
	setOperationAction(ISD::USUBO, MVT::i64, Custom);
	setOperationAction(ISD::SMULO, MVT::i32, Custom);
	setOperationAction(ISD::SMULO, MVT::i64, Custom);
	setOperationAction(ISD::UMULO, MVT::i32, Custom);
	setOperationAction(ISD::UMULO, MVT::i64, Custom);

	setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
	setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
	setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
	setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
	setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom);
	setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom);
	setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
	setOperationAction(ISD::SSUBO_CARRY, MVT::i64, Custom);

	setOperationAction(ISD::FSIN, MVT::f32, Expand);
	setOperationAction(ISD::FSIN, MVT::f64, Expand);
	setOperationAction(ISD::FCOS, MVT::f32, Expand);
	setOperationAction(ISD::FCOS, MVT::f64, Expand);
	setOperationAction(ISD::FPOW, MVT::f32, Expand);
	setOperationAction(ISD::FPOW, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
	if (Subtarget->hasFullFP16())
	setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
	else
	setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);

	for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
	ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
	ISD::FEXP, ISD::FEXP2, ISD::FLOG,
	ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
	ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
	ISD::STRICT_FSIN, ISD::STRICT_FEXP, ISD::STRICT_FEXP2,
	ISD::STRICT_FLOG, ISD::STRICT_FLOG2, ISD::STRICT_FLOG10}) {
	setOperationAction(Op, MVT::f16, Promote);
	setOperationAction(Op, MVT::v4f16, Expand);
	setOperationAction(Op, MVT::v8f16, Expand);
	}

	if (!Subtarget->hasFullFP16()) {
	for (auto Op :
	{ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
	ISD::BR_CC, ISD::FADD, ISD::FSUB,
	ISD::FMUL, ISD::FDIV, ISD::FMA,
	ISD::FNEG, ISD::FABS, ISD::FCEIL,
	ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
	ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
	ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
	ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
	ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
	ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
	ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
	ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
	ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
	ISD::STRICT_FMAXIMUM})
	setOperationAction(Op, MVT::f16, Promote);

	// Round-to-integer need custom lowering for fp16, as Promote doesn't work
	// because the result type is integer.
	for (auto Op : {ISD::STRICT_LROUND, ISD::STRICT_LLROUND, ISD::STRICT_LRINT,
	ISD::STRICT_LLRINT})
	setOperationAction(Op, MVT::f16, Custom);

	// promote v4f16 to v4f32 when that is known to be safe.
	setOperationAction(ISD::FADD, MVT::v4f16, Promote);
	setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
	setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
	setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
	AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
	AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);

	setOperationAction(ISD::FABS, MVT::v4f16, Expand);
	setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
	setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
	setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
	setOperationAction(ISD::FMA, MVT::v4f16, Expand);
	setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
	setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
	setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
	setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
	setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
	setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
	setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);

	setOperationAction(ISD::FABS, MVT::v8f16, Expand);
	setOperationAction(ISD::FADD, MVT::v8f16, Expand);
	setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
	setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
	setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
	setOperationAction(ISD::FMA, MVT::v8f16, Expand);
	setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
	setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
	setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
	setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
	setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
	setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
	setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
	setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
	setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
	setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
	setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
	setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
	setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
	}

	// AArch64 has implementations of a lot of rounding-like FP operations.
	for (auto Op :
	{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL,
	ISD::FRINT, ISD::FTRUNC, ISD::FROUND,
	ISD::FROUNDEVEN, ISD::FMINNUM, ISD::FMAXNUM,
	ISD::FMINIMUM, ISD::FMAXIMUM, ISD::LROUND,
	ISD::LLROUND, ISD::LRINT, ISD::LLRINT,
	ISD::STRICT_FFLOOR, ISD::STRICT_FCEIL, ISD::STRICT_FNEARBYINT,
	ISD::STRICT_FRINT, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
	ISD::STRICT_FROUND, ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM,
	ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_LROUND,
	ISD::STRICT_LLROUND, ISD::STRICT_LRINT, ISD::STRICT_LLRINT}) {
	for (MVT Ty : {MVT::f32, MVT::f64})
	setOperationAction(Op, Ty, Legal);
	if (Subtarget->hasFullFP16())
	setOperationAction(Op, MVT::f16, Legal);
	}

	// Basic strict FP operations are legal
	for (auto Op : {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
	ISD::STRICT_FDIV, ISD::STRICT_FMA, ISD::STRICT_FSQRT}) {
	for (MVT Ty : {MVT::f32, MVT::f64})
	setOperationAction(Op, Ty, Legal);
	if (Subtarget->hasFullFP16())
	setOperationAction(Op, MVT::f16, Legal);
	}

	// Strict conversion to a larger type is legal
	for (auto VT : {MVT::f32, MVT::f64})
	setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);

	setOperationAction(ISD::PREFETCH, MVT::Other, Custom);

	setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
	setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);

	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);

	// Generate outline atomics library calls only if LSE was not specified for
	// subtarget
	if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
	setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
	#define LCALLNAMES(A, B, N) \
	setLibcallName(A##N##_RELAX, #B #N "_relax"); \
	setLibcallName(A##N##_ACQ, #B #N "_acq"); \
	setLibcallName(A##N##_REL, #B #N "_rel"); \
	setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
	#define LCALLNAME4(A, B) \
	LCALLNAMES(A, B, 1) \
	LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
	#define LCALLNAME5(A, B) \
	LCALLNAMES(A, B, 1) \
	LCALLNAMES(A, B, 2) \
	LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
	LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
	LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
	#undef LCALLNAMES
	#undef LCALLNAME4
	#undef LCALLNAME5
	}

	// 128-bit loads and stores can be done without expanding
	setOperationAction(ISD::LOAD, MVT::i128, Custom);
	setOperationAction(ISD::STORE, MVT::i128, Custom);

	// Aligned 128-bit loads and stores are single-copy atomic according to the
	// v8.4a spec.
	if (Subtarget->hasLSE2()) {
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
	}

	// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
	// custom lowering, as there are no un-paired non-temporal stores and
	// legalization will break up 256 bit inputs.
	setOperationAction(ISD::STORE, MVT::v32i8, Custom);
	setOperationAction(ISD::STORE, MVT::v16i16, Custom);
	setOperationAction(ISD::STORE, MVT::v16f16, Custom);
	setOperationAction(ISD::STORE, MVT::v8i32, Custom);
	setOperationAction(ISD::STORE, MVT::v8f32, Custom);
	setOperationAction(ISD::STORE, MVT::v4f64, Custom);
	setOperationAction(ISD::STORE, MVT::v4i64, Custom);

	// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
	// This requires the Performance Monitors extension.
	if (Subtarget->hasPerfMon())
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);

	if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
	getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
	// Issue __sincos_stret if available.
	setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
	setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
	} else {
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
	}

	if (Subtarget->getTargetTriple().isOSMSVCRT()) {
	// MSVCRT doesn't have powi; fall back to pow
	setLibcallName(RTLIB::POWI_F32, nullptr);
	setLibcallName(RTLIB::POWI_F64, nullptr);
	}

	// Make floating-point constants legal for the large code model, so they don't
	// become loads from the constant pool.
	if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
	setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
	setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
	}

	// AArch64 does not have floating-point extending loads, i1 sign-extending
	// load, floating-point truncating stores, or v2i32->v2i16 truncating store.
	for (MVT VT : MVT::fp_valuetypes()) {
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
	}
	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);

	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f128, MVT::f80, Expand);
	setTruncStoreAction(MVT::f128, MVT::f64, Expand);
	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
	setTruncStoreAction(MVT::f128, MVT::f16, Expand);

	setOperationAction(ISD::BITCAST, MVT::i16, Custom);
	setOperationAction(ISD::BITCAST, MVT::f16, Custom);
	setOperationAction(ISD::BITCAST, MVT::bf16, Custom);

	// Indexed loads and stores are supported.
	for (unsigned im = (unsigned)ISD::PRE_INC;
	im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
	setIndexedLoadAction(im, MVT::i8, Legal);
	setIndexedLoadAction(im, MVT::i16, Legal);
	setIndexedLoadAction(im, MVT::i32, Legal);
	setIndexedLoadAction(im, MVT::i64, Legal);
	setIndexedLoadAction(im, MVT::f64, Legal);
	setIndexedLoadAction(im, MVT::f32, Legal);
	setIndexedLoadAction(im, MVT::f16, Legal);
	setIndexedLoadAction(im, MVT::bf16, Legal);
	setIndexedStoreAction(im, MVT::i8, Legal);
	setIndexedStoreAction(im, MVT::i16, Legal);
	setIndexedStoreAction(im, MVT::i32, Legal);
	setIndexedStoreAction(im, MVT::i64, Legal);
	setIndexedStoreAction(im, MVT::f64, Legal);
	setIndexedStoreAction(im, MVT::f32, Legal);
	setIndexedStoreAction(im, MVT::f16, Legal);
	setIndexedStoreAction(im, MVT::bf16, Legal);
	}

	// Trap.
	setOperationAction(ISD::TRAP, MVT::Other, Legal);
	setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
	setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);

	// We combine OR nodes for bitfield operations.
	setTargetDAGCombine(ISD::OR);
	// Try to create BICs for vector ANDs.
	setTargetDAGCombine(ISD::AND);

	// Vector add and sub nodes may conceal a high-half opportunity.
	// Also, try to fold ADD into CSINC/CSINV..
	setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
	ISD::UINT_TO_FP});

	setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
	ISD::FP_TO_UINT_SAT, ISD::FDIV});

	// Try and combine setcc with csel
	setTargetDAGCombine(ISD::SETCC);

	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);

	setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
	ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
	ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
	ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
	if (Subtarget->supportsAddressTopByteIgnored())
	setTargetDAGCombine(ISD::LOAD);

	setTargetDAGCombine(ISD::MSTORE);

	setTargetDAGCombine(ISD::MUL);

	setTargetDAGCombine({ISD::SELECT, ISD::VSELECT});

	setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
	ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
	ISD::VECREDUCE_ADD, ISD::STEP_VECTOR});

	setTargetDAGCombine({ISD::MGATHER, ISD::MSCATTER});

	setTargetDAGCombine(ISD::FP_EXTEND);

	setTargetDAGCombine(ISD::GlobalAddress);

	// In case of strict alignment, avoid an excessive number of byte wide stores.
	MaxStoresPerMemsetOptSize = 8;
	MaxStoresPerMemset =
	Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;

	MaxGluedStoresPerMemcpy = 4;
	MaxStoresPerMemcpyOptSize = 4;
	MaxStoresPerMemcpy =
	Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;

	MaxStoresPerMemmoveOptSize = 4;
	MaxStoresPerMemmove = 4;

	MaxLoadsPerMemcmpOptSize = 4;
	MaxLoadsPerMemcmp =
	Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;

	setStackPointerRegisterToSaveRestore(AArch64::SP);

	setSchedulingPreference(Sched::Hybrid);

	EnableExtLdPromotion = true;

	// Set required alignment.
	setMinFunctionAlignment(Align(4));
	// Set preferred alignments.
	setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
	setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
	setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));

	// Only change the limit for entries in a jump table if specified by
	// the sub target, but not at the command line.
	unsigned MaxJT = STI.getMaximumJumpTableSize();
	if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
	setMaximumJumpTableSize(MaxJT);

	setHasExtractBitsInsn(true);

	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

	if (Subtarget->hasNEON()) {
	// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
	// silliness like this:
	for (auto Op :
	{ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
	ISD::BR_CC, ISD::FADD, ISD::FSUB,
	ISD::FMUL, ISD::FDIV, ISD::FMA,
	ISD::FNEG, ISD::FABS, ISD::FCEIL,
	ISD::FSQRT, ISD::FFLOOR, ISD::FNEARBYINT,
	ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN,
	ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM,
	ISD::FMINIMUM, ISD::FMAXIMUM, ISD::STRICT_FADD,
	ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
	ISD::STRICT_FMA, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR,
	ISD::STRICT_FSQRT, ISD::STRICT_FRINT, ISD::STRICT_FNEARBYINT,
	ISD::STRICT_FROUND, ISD::STRICT_FTRUNC, ISD::STRICT_FROUNDEVEN,
	ISD::STRICT_FMINNUM, ISD::STRICT_FMAXNUM, ISD::STRICT_FMINIMUM,
	ISD::STRICT_FMAXIMUM})
	setOperationAction(Op, MVT::v1f64, Expand);

	for (auto Op :
	{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP,
	ISD::FP_ROUND, ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, ISD::MUL,
	ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT,
	ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_ROUND})
	setOperationAction(Op, MVT::v1i64, Expand);

	// AArch64 doesn't have a direct vector ->f32 conversion instructions for
	// elements smaller than i32, so promote the input to i32 first.
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);

	// Similarly, there is no direct i32 -> f64 vector conversion instruction.
	// Or, direct i32 -> f16 vector conversion. Set it so custom, so the
	// conversion happens in two steps: v4i32 -> v4f32 -> v4f16
	for (auto Op : {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::STRICT_SINT_TO_FP,
	ISD::STRICT_UINT_TO_FP})
	for (auto VT : {MVT::v2i32, MVT::v2i64, MVT::v4i32})
	setOperationAction(Op, VT, Custom);

	if (Subtarget->hasFullFP16()) {
	setOperationAction(ISD::ConstantFP, MVT::f16, Legal);

	setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
	} else {
	// when AArch64 doesn't have fullfp16 support, promote the input
	// to i32 first.
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
	setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
	setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
	}

	setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
	setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
	setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
	setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
	for (auto VT : {MVT::v1i64, MVT::v2i64}) {
	setOperationAction(ISD::UMAX, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Custom);
	setOperationAction(ISD::UMIN, VT, Custom);
	setOperationAction(ISD::SMIN, VT, Custom);
	}

	// AArch64 doesn't have MUL.2d:
	setOperationAction(ISD::MUL, MVT::v2i64, Expand);
	// Custom handling for some quad-vector types to detect MULL.
	setOperationAction(ISD::MUL, MVT::v8i16, Custom);
	setOperationAction(ISD::MUL, MVT::v4i32, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);

	// Saturates
	for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
	MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
	setOperationAction(ISD::SADDSAT, VT, Legal);
	setOperationAction(ISD::UADDSAT, VT, Legal);
	setOperationAction(ISD::SSUBSAT, VT, Legal);
	setOperationAction(ISD::USUBSAT, VT, Legal);
	}

	for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
	MVT::v4i32}) {
	setOperationAction(ISD::AVGFLOORS, VT, Legal);
	setOperationAction(ISD::AVGFLOORU, VT, Legal);
	setOperationAction(ISD::AVGCEILS, VT, Legal);
	setOperationAction(ISD::AVGCEILU, VT, Legal);
	setOperationAction(ISD::ABDS, VT, Legal);
	setOperationAction(ISD::ABDU, VT, Legal);
	}

	// Vector reductions
	for (MVT VT : { MVT::v4f16, MVT::v2f32,
	MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
	if (VT.getVectorElementType() != MVT::f16 \|\| Subtarget->hasFullFP16()) {
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);

	setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
	}
	}
	for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
	MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	}
	setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);

	setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
	setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
	// Likewise, narrowing and extending vector loads/stores aren't handled
	// directly.
	for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);

	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32) {
	setOperationAction(ISD::MULHS, VT, Legal);
	setOperationAction(ISD::MULHU, VT, Legal);
	} else {
	setOperationAction(ISD::MULHS, VT, Expand);
	setOperationAction(ISD::MULHU, VT, Expand);
	}
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::UMUL_LOHI, VT, Expand);

	setOperationAction(ISD::BSWAP, VT, Expand);
	setOperationAction(ISD::CTTZ, VT, Expand);

	for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
	setTruncStoreAction(VT, InnerVT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}
	}

	// AArch64 has implementations of a lot of rounding-like FP operations.
	for (auto Op :
	{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
	ISD::FROUND, ISD::FROUNDEVEN, ISD::STRICT_FFLOOR,
	ISD::STRICT_FNEARBYINT, ISD::STRICT_FCEIL, ISD::STRICT_FRINT,
	ISD::STRICT_FTRUNC, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN}) {
	for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64})
	setOperationAction(Op, Ty, Legal);
	if (Subtarget->hasFullFP16())
	for (MVT Ty : {MVT::v4f16, MVT::v8f16})
	setOperationAction(Op, Ty, Legal);
	}

	setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);

	setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
	setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
	setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);

	// ADDP custom lowering
	for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
	setOperationAction(ISD::ADD, VT, Custom);
	// FADDP custom lowering
	for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
	setOperationAction(ISD::FADD, VT, Custom);
	}

	if (Subtarget->hasSME()) {
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
	}

	// FIXME: Move lowering for more nodes here if those are common between
	// SVE and SME.
	if (Subtarget->hasSVE() \|\| Subtarget->hasSME()) {
	for (auto VT :
	{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	}
	}

	if (Subtarget->hasSVE()) {
	for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::BSWAP, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::MULHS, VT, Custom);
	setOperationAction(ISD::MULHU, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	setOperationAction(ISD::SMIN, VT, Custom);
	setOperationAction(ISD::UMIN, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Custom);
	setOperationAction(ISD::UMAX, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::ABS, VT, Custom);
	setOperationAction(ISD::ABDS, VT, Custom);
	setOperationAction(ISD::ABDU, VT, Custom);
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);

	setOperationAction(ISD::UMUL_LOHI, VT, Expand);
	setOperationAction(ISD::SMUL_LOHI, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::ROTL, VT, Expand);
	setOperationAction(ISD::ROTR, VT, Expand);

	setOperationAction(ISD::SADDSAT, VT, Legal);
	setOperationAction(ISD::UADDSAT, VT, Legal);
	setOperationAction(ISD::SSUBSAT, VT, Legal);
	setOperationAction(ISD::USUBSAT, VT, Legal);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::SDIVREM, VT, Expand);
	setOperationAction(ISD::UDIVREM, VT, Expand);
	}

	// Illegal unpacked integer vector types.
	for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	}

	// Legalize unpacked bitcasts to REINTERPRET_CAST.
	for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
	MVT::nxv4bf16, MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
	setOperationAction(ISD::BITCAST, VT, Custom);

	for (auto VT :
	{ MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
	MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);

	for (auto VT :
	{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);

	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);

	// There are no legal MVT::nxv16f## based types.
	if (VT != MVT::nxv16i1) {
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	}
	}

	// NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
	MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	}

	// Firstly, exclude all scalable vector extending loads/truncating stores,
	// include both integer and floating scalable vector.
	for (MVT VT : MVT::scalable_vector_valuetypes()) {
	for (MVT InnerVT : MVT::scalable_vector_valuetypes()) {
	setTruncStoreAction(VT, InnerVT, Expand);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
	}
	}

	// Then, selectively enable those which we directly support.
	setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i8, Legal);
	setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i16, Legal);
	setTruncStoreAction(MVT::nxv2i64, MVT::nxv2i32, Legal);
	setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i8, Legal);
	setTruncStoreAction(MVT::nxv4i32, MVT::nxv4i16, Legal);
	setTruncStoreAction(MVT::nxv8i16, MVT::nxv8i8, Legal);
	for (auto Op : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
	setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i8, Legal);
	setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i16, Legal);
	setLoadExtAction(Op, MVT::nxv2i64, MVT::nxv2i32, Legal);
	setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i8, Legal);
	setLoadExtAction(Op, MVT::nxv4i32, MVT::nxv4i16, Legal);
	setLoadExtAction(Op, MVT::nxv8i16, MVT::nxv8i8, Legal);
	}

	// SVE supports truncating stores of 64 and 128-bit vectors
	setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
	setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
	setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
	setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
	setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);

	for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
	MVT::nxv4f32, MVT::nxv2f64}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::FADD, VT, Custom);
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);
	setOperationAction(ISD::FDIV, VT, Custom);
	setOperationAction(ISD::FMA, VT, Custom);
	setOperationAction(ISD::FMAXIMUM, VT, Custom);
	setOperationAction(ISD::FMAXNUM, VT, Custom);
	setOperationAction(ISD::FMINIMUM, VT, Custom);
	setOperationAction(ISD::FMINNUM, VT, Custom);
	setOperationAction(ISD::FMUL, VT, Custom);
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FSUB, VT, Custom);
	setOperationAction(ISD::FCEIL, VT, Custom);
	setOperationAction(ISD::FFLOOR, VT, Custom);
	setOperationAction(ISD::FNEARBYINT, VT, Custom);
	setOperationAction(ISD::FRINT, VT, Custom);
	setOperationAction(ISD::FROUND, VT, Custom);
	setOperationAction(ISD::FROUNDEVEN, VT, Custom);
	setOperationAction(ISD::FTRUNC, VT, Custom);
	setOperationAction(ISD::FSQRT, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FP_EXTEND, VT, Custom);
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);

	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FPOWI, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FSINCOS, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);

	setCondCodeAction(ISD::SETO, VT, Expand);
	setCondCodeAction(ISD::SETOLT, VT, Expand);
	setCondCodeAction(ISD::SETLT, VT, Expand);
	setCondCodeAction(ISD::SETOLE, VT, Expand);
	setCondCodeAction(ISD::SETLE, VT, Expand);
	setCondCodeAction(ISD::SETULT, VT, Expand);
	setCondCodeAction(ISD::SETULE, VT, Expand);
	setCondCodeAction(ISD::SETUGE, VT, Expand);
	setCondCodeAction(ISD::SETUGT, VT, Expand);
	setCondCodeAction(ISD::SETUEQ, VT, Expand);
	setCondCodeAction(ISD::SETONE, VT, Expand);
	}

	for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
	}

	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);

	// NEON doesn't support integer divides, but SVE does
	for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
	MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	}

	// NEON doesn't support 64-bit vector integer muls, but SVE does.
	setOperationAction(ISD::MUL, MVT::v1i64, Custom);
	setOperationAction(ISD::MUL, MVT::v2i64, Custom);

	// NOTE: Currently this has to happen after computeRegisterProperties rather
	// than the preferred option of combining it with the addRegisterClass call.
	if (Subtarget->useSVEForFixedLengthVectors()) {
	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addTypeForFixedLengthSVE(VT);
	for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
	if (useSVEForFixedLengthVectorVT(VT))
	addTypeForFixedLengthSVE(VT);

	// 64bit results can mean a bigger than NEON input.
	for (auto VT : {MVT::v8i8, MVT::v4i16})
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);

	// 128bit results imply a bigger than NEON input.
	for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	for (auto VT : {MVT::v8f16, MVT::v4f32})
	setOperationAction(ISD::FP_ROUND, VT, Custom);

	// These operations are not supported on NEON but SVE can do them.
	setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
	setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
	setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
	setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
	setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
	setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
	setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
	setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
	setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
	setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
	setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
	setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
	setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);

	// Int operations with no NEON support.
	for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	}

	// FP operations with no NEON support.
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
	MVT::v1f64, MVT::v2f64})
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);

	// Use SVE for vectors with more than 2 elements.
	for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	}

	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
	setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);

	setOperationAction(ISD::VSCALE, MVT::i32, Custom);
	}

	if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
	// Only required for llvm.aarch64.mops.memset.tag
	setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
	}

	PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();

	IsStrictFPEnabled = true;
	}

	void AArch64TargetLowering::addTypeForNEON(MVT VT) {
	assert(VT.isVector() && "VT should be a vector type");

	if (VT.isFloatingPoint()) {
	MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
	setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
	setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
	}

	// Mark vector float intrinsics as expand.
	if (VT == MVT::v2f32 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64) {
	setOperationAction(ISD::FSIN, VT, Expand);
	setOperationAction(ISD::FCOS, VT, Expand);
	setOperationAction(ISD::FPOW, VT, Expand);
	setOperationAction(ISD::FLOG, VT, Expand);
	setOperationAction(ISD::FLOG2, VT, Expand);
	setOperationAction(ISD::FLOG10, VT, Expand);
	setOperationAction(ISD::FEXP, VT, Expand);
	setOperationAction(ISD::FEXP2, VT, Expand);
	}

	// But we do support custom-lowering for FCOPYSIGN.
	if (VT == MVT::v2f32 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	((VT == MVT::v4f16 \|\| VT == MVT::v8f16) && Subtarget->hasFullFP16()))
	setOperationAction(ISD::FCOPYSIGN, VT, Custom);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::OR, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);

	setOperationAction(ISD::SELECT, VT, Expand);
	setOperationAction(ISD::SELECT_CC, VT, Expand);
	setOperationAction(ISD::VSELECT, VT, Expand);
	for (MVT InnerVT : MVT::all_valuetypes())
	setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);

	// CNT supports only B element sizes, then use UADDLP to widen.
	if (VT != MVT::v8i8 && VT != MVT::v16i8)
	setOperationAction(ISD::CTPOP, VT, Custom);

	setOperationAction(ISD::UDIV, VT, Expand);
	setOperationAction(ISD::SDIV, VT, Expand);
	setOperationAction(ISD::UREM, VT, Expand);
	setOperationAction(ISD::SREM, VT, Expand);
	setOperationAction(ISD::FREM, VT, Expand);

	for (unsigned Opcode :
	{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
	ISD::FP_TO_UINT_SAT, ISD::STRICT_FP_TO_SINT, ISD::STRICT_FP_TO_UINT})
	setOperationAction(Opcode, VT, Custom);

	if (!VT.isFloatingPoint())
	setOperationAction(ISD::ABS, VT, Legal);

	// [SU][MIN\|MAX] are available for all NEON types apart from i64.
	if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
	for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
	setOperationAction(Opcode, VT, Legal);

	// F[MIN\|MAX][NUM\|NAN] and simple strict operations are available for all FP
	// NEON types.
	if (VT.isFloatingPoint() &&
	VT.getVectorElementType() != MVT::bf16 &&
	(VT.getVectorElementType() != MVT::f16 \|\| Subtarget->hasFullFP16()))
	for (unsigned Opcode :
	{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM,
	ISD::STRICT_FMINIMUM, ISD::STRICT_FMAXIMUM, ISD::STRICT_FMINNUM,
	ISD::STRICT_FMAXNUM, ISD::STRICT_FADD, ISD::STRICT_FSUB,
	ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FMA,
	ISD::STRICT_FSQRT})
	setOperationAction(Opcode, VT, Legal);

	// Strict fp extend and trunc are legal
	if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 16)
	setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
	if (VT.isFloatingPoint() && VT.getScalarSizeInBits() != 64)
	setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);

	// FIXME: We could potentially make use of the vector comparison instructions
	// for STRICT_FSETCC and STRICT_FSETCSS, but there's a number of
	// complications:
	// * FCMPEQ/NE are quiet comparisons, the rest are signalling comparisons,
	// so we would need to expand when the condition code doesn't match the
	// kind of comparison.
	// * Some kinds of comparison require more than one FCMXY instruction so
	// would need to be expanded instead.
	// * The lowering of the non-strict versions involves target-specific ISD
	// nodes so we would likely need to add strict versions of all of them and
	// handle them appropriately.
	setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
	setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);

	if (Subtarget->isLittleEndian()) {
	for (unsigned im = (unsigned)ISD::PRE_INC;
	im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
	setIndexedLoadAction(im, VT, Legal);
	setIndexedStoreAction(im, VT, Legal);
	}
	}
	}

	bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
	EVT OpVT) const {
	// Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
	if (!Subtarget->hasSVE())
	return true;

	// We can only support legal predicate result types. We can use the SVE
	// whilelo instruction for generating fixed-width predicates too.
	if (ResVT != MVT::nxv2i1 && ResVT != MVT::nxv4i1 && ResVT != MVT::nxv8i1 &&
	ResVT != MVT::nxv16i1 && ResVT != MVT::v2i1 && ResVT != MVT::v4i1 &&
	ResVT != MVT::v8i1 && ResVT != MVT::v16i1)
	return true;

	// The whilelo instruction only works with i32 or i64 scalar inputs.
	if (OpVT != MVT::i32 && OpVT != MVT::i64)
	return true;

	return false;
	}

	void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	// By default everything must be expanded.
	for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
	setOperationAction(Op, VT, Expand);

	// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
	setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);

	if (VT.isFloatingPoint()) {
	setCondCodeAction(ISD::SETO, VT, Expand);
	setCondCodeAction(ISD::SETOLT, VT, Expand);
	setCondCodeAction(ISD::SETLT, VT, Expand);
	setCondCodeAction(ISD::SETOLE, VT, Expand);
	setCondCodeAction(ISD::SETLE, VT, Expand);
	setCondCodeAction(ISD::SETULT, VT, Expand);
	setCondCodeAction(ISD::SETULE, VT, Expand);
	setCondCodeAction(ISD::SETUGE, VT, Expand);
	setCondCodeAction(ISD::SETUGT, VT, Expand);
	setCondCodeAction(ISD::SETUEQ, VT, Expand);
	setCondCodeAction(ISD::SETONE, VT, Expand);
	}

	// Mark integer truncating stores/extending loads as having custom lowering
	if (VT.isInteger()) {
	MVT InnerVT = VT.changeVectorElementType(MVT::i8);
	while (InnerVT != VT) {
	setTruncStoreAction(VT, InnerVT, Custom);
	setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
	setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
	InnerVT = InnerVT.changeVectorElementType(
	MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
	}
	}

	// Mark floating-point truncating stores/extending loads as having custom
	// lowering
	if (VT.isFloatingPoint()) {
	MVT InnerVT = VT.changeVectorElementType(MVT::f16);
	while (InnerVT != VT) {
	setTruncStoreAction(VT, InnerVT, Custom);
	setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
	InnerVT = InnerVT.changeVectorElementType(
	MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
	}
	}

	// Lower fixed length vector operations to scalable equivalents.
	setOperationAction(ISD::ABS, VT, Custom);
	setOperationAction(ISD::ADD, VT, Custom);
	setOperationAction(ISD::AND, VT, Custom);
	setOperationAction(ISD::ANY_EXTEND, VT, Custom);
	setOperationAction(ISD::BITCAST, VT, Custom);
	setOperationAction(ISD::BITREVERSE, VT, Custom);
	setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
	setOperationAction(ISD::BSWAP, VT, Custom);
	setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
	setOperationAction(ISD::CTLZ, VT, Custom);
	setOperationAction(ISD::CTPOP, VT, Custom);
	setOperationAction(ISD::CTTZ, VT, Custom);
	setOperationAction(ISD::FABS, VT, Custom);
	setOperationAction(ISD::FADD, VT, Custom);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::FCEIL, VT, Custom);
	setOperationAction(ISD::FDIV, VT, Custom);
	setOperationAction(ISD::FFLOOR, VT, Custom);
	setOperationAction(ISD::FMA, VT, Custom);
	setOperationAction(ISD::FMAXIMUM, VT, Custom);
	setOperationAction(ISD::FMAXNUM, VT, Custom);
	setOperationAction(ISD::FMINIMUM, VT, Custom);
	setOperationAction(ISD::FMINNUM, VT, Custom);
	setOperationAction(ISD::FMUL, VT, Custom);
	setOperationAction(ISD::FNEARBYINT, VT, Custom);
	setOperationAction(ISD::FNEG, VT, Custom);
	setOperationAction(ISD::FP_EXTEND, VT, Custom);
	setOperationAction(ISD::FP_ROUND, VT, Custom);
	setOperationAction(ISD::FP_TO_SINT, VT, Custom);
	setOperationAction(ISD::FP_TO_UINT, VT, Custom);
	setOperationAction(ISD::FRINT, VT, Custom);
	setOperationAction(ISD::FROUND, VT, Custom);
	setOperationAction(ISD::FROUNDEVEN, VT, Custom);
	setOperationAction(ISD::FSQRT, VT, Custom);
	setOperationAction(ISD::FSUB, VT, Custom);
	setOperationAction(ISD::FTRUNC, VT, Custom);
	setOperationAction(ISD::LOAD, VT, Custom);
	setOperationAction(ISD::MGATHER, VT, Custom);
	setOperationAction(ISD::MLOAD, VT, Custom);
	setOperationAction(ISD::MSCATTER, VT, Custom);
	setOperationAction(ISD::MSTORE, VT, Custom);
	setOperationAction(ISD::MUL, VT, Custom);
	setOperationAction(ISD::MULHS, VT, Custom);
	setOperationAction(ISD::MULHU, VT, Custom);
	setOperationAction(ISD::OR, VT, Custom);
	setOperationAction(ISD::SDIV, VT, Custom);
	setOperationAction(ISD::SELECT, VT, Custom);
	setOperationAction(ISD::SETCC, VT, Custom);
	setOperationAction(ISD::SHL, VT, Custom);
	setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
	setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
	setOperationAction(ISD::SINT_TO_FP, VT, Custom);
	setOperationAction(ISD::SMAX, VT, Custom);
	setOperationAction(ISD::SMIN, VT, Custom);
	setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
	setOperationAction(ISD::SRA, VT, Custom);
	setOperationAction(ISD::SRL, VT, Custom);
	setOperationAction(ISD::STORE, VT, Custom);
	setOperationAction(ISD::SUB, VT, Custom);
	setOperationAction(ISD::TRUNCATE, VT, Custom);
	setOperationAction(ISD::UDIV, VT, Custom);
	setOperationAction(ISD::UINT_TO_FP, VT, Custom);
	setOperationAction(ISD::UMAX, VT, Custom);
	setOperationAction(ISD::UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
	setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
	setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
	setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
	setOperationAction(ISD::VSELECT, VT, Custom);
	setOperationAction(ISD::XOR, VT, Custom);
	setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
	}

	void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &AArch64::FPR64RegClass);
	addTypeForNEON(VT);
	}

	void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
	addRegisterClass(VT, &AArch64::FPR128RegClass);
	addTypeForNEON(VT);
	}

	EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
	LLVMContext &C, EVT VT) const {
	if (!VT.isVector())
	return MVT::i32;
	if (VT.isScalableVector())
	return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
	return VT.changeVectorElementTypeToInteger();
	}

	static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
	const APInt &Demanded,
	TargetLowering::TargetLoweringOpt &TLO,
	unsigned NewOpc) {
	uint64_t OldImm = Imm, NewImm, Enc;
	uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;

	// Return if the immediate is already all zeros, all ones, a bimm32 or a
	// bimm64.
	if (Imm == 0 \|\| Imm == Mask \|\|
	AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
	return false;

	unsigned EltSize = Size;
	uint64_t DemandedBits = Demanded.getZExtValue();

	// Clear bits that are not demanded.
	Imm &= DemandedBits;

	while (true) {
	// The goal here is to set the non-demanded bits in a way that minimizes
	// the number of switching between 0 and 1. In order to achieve this goal,
	// we set the non-demanded bits to the value of the preceding demanded bits.
	// For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
	// non-demanded bit), we copy bit0 (1) to the least significant 'x',
	// bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
	// The final result is 0b11000011.
	uint64_t NonDemandedBits = ~DemandedBits;
	uint64_t InvertedImm = ~Imm & DemandedBits;
	uint64_t RotatedImm =
	((InvertedImm << 1) \| (InvertedImm >> (EltSize - 1) & 1)) &
	NonDemandedBits;
	uint64_t Sum = RotatedImm + NonDemandedBits;
	bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
	uint64_t Ones = (Sum + Carry) & NonDemandedBits;
	NewImm = (Imm \| Ones) & Mask;

	// If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
	// or all-ones or all-zeros, in which case we can stop searching. Otherwise,
	// we halve the element size and continue the search.
	if (isShiftedMask_64(NewImm) \|\| isShiftedMask_64(~(NewImm \| ~Mask)))
	break;

	// We cannot shrink the element size any further if it is 2-bits.
	if (EltSize == 2)
	return false;

	EltSize /= 2;
	Mask >>= EltSize;
	uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;

	// Return if there is mismatch in any of the demanded bits of Imm and Hi.
	if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
	return false;

	// Merge the upper and lower halves of Imm and DemandedBits.
	Imm \|= Hi;
	DemandedBits \|= DemandedBitsHi;
	}

	++NumOptimizedImms;

	// Replicate the element across the register width.
	while (EltSize < Size) {
	NewImm \|= NewImm << EltSize;
	EltSize *= 2;
	}

	(void)OldImm;
	assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
	"demanded bits should never be altered");
	assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");

	// Create the new constant immediate node.
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	SDValue New;

	// If the new constant immediate is all-zeros or all-ones, let the target
	// independent DAG combine optimize this node.
	if (NewImm == 0 \|\| NewImm == OrigMask) {
	New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
	TLO.DAG.getConstant(NewImm, DL, VT));
	// Otherwise, create a machine node so that target independent DAG combine
	// doesn't undo this optimization.
	} else {
	Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
	SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
	New = SDValue(
	TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
	}

	return TLO.CombineTo(Op, New);
	}

	bool AArch64TargetLowering::targetShrinkDemandedConstant(
	SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
	TargetLoweringOpt &TLO) const {
	// Delay this optimization to as late as possible.
	if (!TLO.LegalOps)
	return false;

	if (!EnableOptimizeLogicalImm)
	return false;

	EVT VT = Op.getValueType();
	if (VT.isVector())
	return false;

	unsigned Size = VT.getSizeInBits();
	assert((Size == 32 \|\| Size == 64) &&
	"i32 or i64 is expected after legalization.");

	// Exit early if we demand all bits.
	if (DemandedBits.countPopulation() == Size)
	return false;

	unsigned NewOpc;
	switch (Op.getOpcode()) {
	default:
	return false;
	case ISD::AND:
	NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
	break;
	case ISD::OR:
	NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
	break;
	case ISD::XOR:
	NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
	break;
	}
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!C)
	return false;
	uint64_t Imm = C->getZExtValue();
	return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
	}

	/// computeKnownBitsForTargetNode - Determine which of the bits specified in
	/// Mask are known to be either zero or one and return them Known.
	void AArch64TargetLowering::computeKnownBitsForTargetNode(
	const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
	const SelectionDAG &DAG, unsigned Depth) const {
	switch (Op.getOpcode()) {
	default:
	break;
	case AArch64ISD::DUP: {
	SDValue SrcOp = Op.getOperand(0);
	Known = DAG.computeKnownBits(SrcOp, Depth + 1);
	if (SrcOp.getValueSizeInBits() != Op.getScalarValueSizeInBits()) {
	assert(SrcOp.getValueSizeInBits() > Op.getScalarValueSizeInBits() &&
	"Expected DUP implicit truncation");
	Known = Known.trunc(Op.getScalarValueSizeInBits());
	}
	break;
	}
	case AArch64ISD::CSEL: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
	Known = KnownBits::commonBits(Known, Known2);
	break;
	}
	case AArch64ISD::BICi: {
	// Compute the bit cleared value.
	uint64_t Mask =
	~(Op->getConstantOperandVal(1) << Op->getConstantOperandVal(2));
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known &= KnownBits::makeConstant(APInt(Known.getBitWidth(), Mask));
	break;
	}
	case AArch64ISD::VLSHR: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
	Known = KnownBits::lshr(Known, Known2);
	break;
	}
	case AArch64ISD::VASHR: {
	KnownBits Known2;
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
	Known = KnownBits::ashr(Known, Known2);
	break;
	}
	case AArch64ISD::LOADgot:
	case AArch64ISD::ADDlow: {
	if (!Subtarget->isTargetILP32())
	break;
	// In ILP32 mode all valid pointers are in the low 4GB of the address-space.
	Known.Zero = APInt::getHighBitsSet(64, 32);
	break;
	}
	case AArch64ISD::ASSERT_ZEXT_BOOL: {
	Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
	Known.Zero \|= APInt(Known.getBitWidth(), 0xFE);
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
	Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
	switch (IntID) {
	default: return;
	case Intrinsic::aarch64_ldaxr:
	case Intrinsic::aarch64_ldxr: {
	unsigned BitWidth = Known.getBitWidth();
	EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
	unsigned MemBits = VT.getScalarSizeInBits();
	Known.Zero \|= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
	return;
	}
	}
	break;
	}
	case ISD::INTRINSIC_WO_CHAIN:
	case ISD::INTRINSIC_VOID: {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	switch (IntNo) {
	default:
	break;
	case Intrinsic::aarch64_neon_umaxv:
	case Intrinsic::aarch64_neon_uminv: {
	// Figure out the datatype of the vector operand. The UMINV instruction
	// will zero extend the result, so we can mark as known zero all the
	// bits larger than the element datatype. 32-bit or larget doesn't need
	// this as those are legal types and will be handled by isel directly.
	MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
	unsigned BitWidth = Known.getBitWidth();
	if (VT == MVT::v8i8 \|\| VT == MVT::v16i8) {
	assert(BitWidth >= 8 && "Unexpected width!");
	APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
	Known.Zero \|= Mask;
	} else if (VT == MVT::v4i16 \|\| VT == MVT::v8i16) {
	assert(BitWidth >= 16 && "Unexpected width!");
	APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
	Known.Zero \|= Mask;
	}
	break;
	} break;
	}
	}
	}
	}

	MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
	EVT) const {
	return MVT::i64;
	}

	bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	bool *Fast) const {
	if (Subtarget->requiresStrictAlign())
	return false;

	if (Fast) {
	// Some CPUs are fine with unaligned stores except for 128-bit ones.
	*Fast = !Subtarget->isMisaligned128StoreSlow() \|\| VT.getStoreSize() != 16 \|\|
	// See comments in performSTORECombine() for more details about
	// these conditions.

	// Code that uses clang vector extensions can mark that it
	// wants unaligned accesses to be treated as fast by
	// underspecifying alignment to be 1 or 2.
	Alignment <= 2 \|\|

	// Disregard v2i64. Memcpy lowering produces those and splitting
	// them regresses performance on micro-benchmarks and olden/bh.
	VT == MVT::v2i64;
	}
	return true;
	}

	// Same as above but handling LLTs instead.
	bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
	LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	bool *Fast) const {
	if (Subtarget->requiresStrictAlign())
	return false;

	if (Fast) {
	// Some CPUs are fine with unaligned stores except for 128-bit ones.
	*Fast = !Subtarget->isMisaligned128StoreSlow() \|\|
	Ty.getSizeInBytes() != 16 \|\|
	// See comments in performSTORECombine() for more details about
	// these conditions.

	// Code that uses clang vector extensions can mark that it
	// wants unaligned accesses to be treated as fast by
	// underspecifying alignment to be 1 or 2.
	Alignment <= 2 \|\|

	// Disregard v2i64. Memcpy lowering produces those and splitting
	// them regresses performance on micro-benchmarks and olden/bh.
	Ty == LLT::fixed_vector(2, 64);
	}
	return true;
	}

	FastISel *
	AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const {
	return AArch64::createFastISel(funcInfo, libInfo);
	}

	const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
	#define MAKE_CASE(V) \
	case V: \
	return #V;
	switch ((AArch64ISD::NodeType)Opcode) {
	case AArch64ISD::FIRST_NUMBER:
	break;
	MAKE_CASE(AArch64ISD::CALL)
	MAKE_CASE(AArch64ISD::ADRP)
	MAKE_CASE(AArch64ISD::ADR)
	MAKE_CASE(AArch64ISD::ADDlow)
	MAKE_CASE(AArch64ISD::LOADgot)
	MAKE_CASE(AArch64ISD::RET_FLAG)
	MAKE_CASE(AArch64ISD::BRCOND)
	MAKE_CASE(AArch64ISD::CSEL)
	MAKE_CASE(AArch64ISD::CSINV)
	MAKE_CASE(AArch64ISD::CSNEG)
	MAKE_CASE(AArch64ISD::CSINC)
	MAKE_CASE(AArch64ISD::THREAD_POINTER)
	MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
	MAKE_CASE(AArch64ISD::ABDS_PRED)
	MAKE_CASE(AArch64ISD::ABDU_PRED)
	MAKE_CASE(AArch64ISD::MUL_PRED)
	MAKE_CASE(AArch64ISD::MULHS_PRED)
	MAKE_CASE(AArch64ISD::MULHU_PRED)
	MAKE_CASE(AArch64ISD::SDIV_PRED)
	MAKE_CASE(AArch64ISD::SHL_PRED)
	MAKE_CASE(AArch64ISD::SMAX_PRED)
	MAKE_CASE(AArch64ISD::SMIN_PRED)
	MAKE_CASE(AArch64ISD::SRA_PRED)
	MAKE_CASE(AArch64ISD::SRL_PRED)
	MAKE_CASE(AArch64ISD::UDIV_PRED)
	MAKE_CASE(AArch64ISD::UMAX_PRED)
	MAKE_CASE(AArch64ISD::UMIN_PRED)
	MAKE_CASE(AArch64ISD::SRAD_MERGE_OP1)
	MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::ADC)
	MAKE_CASE(AArch64ISD::SBC)
	MAKE_CASE(AArch64ISD::ADDS)
	MAKE_CASE(AArch64ISD::SUBS)
	MAKE_CASE(AArch64ISD::ADCS)
	MAKE_CASE(AArch64ISD::SBCS)
	MAKE_CASE(AArch64ISD::ANDS)
	MAKE_CASE(AArch64ISD::CCMP)
	MAKE_CASE(AArch64ISD::CCMN)
	MAKE_CASE(AArch64ISD::FCCMP)
	MAKE_CASE(AArch64ISD::FCMP)
	MAKE_CASE(AArch64ISD::STRICT_FCMP)
	MAKE_CASE(AArch64ISD::STRICT_FCMPE)
	MAKE_CASE(AArch64ISD::DUP)
	MAKE_CASE(AArch64ISD::DUPLANE8)
	MAKE_CASE(AArch64ISD::DUPLANE16)
	MAKE_CASE(AArch64ISD::DUPLANE32)
	MAKE_CASE(AArch64ISD::DUPLANE64)
	MAKE_CASE(AArch64ISD::DUPLANE128)
	MAKE_CASE(AArch64ISD::MOVI)
	MAKE_CASE(AArch64ISD::MOVIshift)
	MAKE_CASE(AArch64ISD::MOVIedit)
	MAKE_CASE(AArch64ISD::MOVImsl)
	MAKE_CASE(AArch64ISD::FMOV)
	MAKE_CASE(AArch64ISD::MVNIshift)
	MAKE_CASE(AArch64ISD::MVNImsl)
	MAKE_CASE(AArch64ISD::BICi)
	MAKE_CASE(AArch64ISD::ORRi)
	MAKE_CASE(AArch64ISD::BSP)
	MAKE_CASE(AArch64ISD::EXTR)
	MAKE_CASE(AArch64ISD::ZIP1)
	MAKE_CASE(AArch64ISD::ZIP2)
	MAKE_CASE(AArch64ISD::UZP1)
	MAKE_CASE(AArch64ISD::UZP2)
	MAKE_CASE(AArch64ISD::TRN1)
	MAKE_CASE(AArch64ISD::TRN2)
	MAKE_CASE(AArch64ISD::REV16)
	MAKE_CASE(AArch64ISD::REV32)
	MAKE_CASE(AArch64ISD::REV64)
	MAKE_CASE(AArch64ISD::EXT)
	MAKE_CASE(AArch64ISD::SPLICE)
	MAKE_CASE(AArch64ISD::VSHL)
	MAKE_CASE(AArch64ISD::VLSHR)
	MAKE_CASE(AArch64ISD::VASHR)
	MAKE_CASE(AArch64ISD::VSLI)
	MAKE_CASE(AArch64ISD::VSRI)
	MAKE_CASE(AArch64ISD::CMEQ)
	MAKE_CASE(AArch64ISD::CMGE)
	MAKE_CASE(AArch64ISD::CMGT)
	MAKE_CASE(AArch64ISD::CMHI)
	MAKE_CASE(AArch64ISD::CMHS)
	MAKE_CASE(AArch64ISD::FCMEQ)
	MAKE_CASE(AArch64ISD::FCMGE)
	MAKE_CASE(AArch64ISD::FCMGT)
	MAKE_CASE(AArch64ISD::CMEQz)
	MAKE_CASE(AArch64ISD::CMGEz)
	MAKE_CASE(AArch64ISD::CMGTz)
	MAKE_CASE(AArch64ISD::CMLEz)
	MAKE_CASE(AArch64ISD::CMLTz)
	MAKE_CASE(AArch64ISD::FCMEQz)
	MAKE_CASE(AArch64ISD::FCMGEz)
	MAKE_CASE(AArch64ISD::FCMGTz)
	MAKE_CASE(AArch64ISD::FCMLEz)
	MAKE_CASE(AArch64ISD::FCMLTz)
	MAKE_CASE(AArch64ISD::SADDV)
	MAKE_CASE(AArch64ISD::UADDV)
	MAKE_CASE(AArch64ISD::SDOT)
	MAKE_CASE(AArch64ISD::UDOT)
	MAKE_CASE(AArch64ISD::SMINV)
	MAKE_CASE(AArch64ISD::UMINV)
	MAKE_CASE(AArch64ISD::SMAXV)
	MAKE_CASE(AArch64ISD::UMAXV)
	MAKE_CASE(AArch64ISD::SADDV_PRED)
	MAKE_CASE(AArch64ISD::UADDV_PRED)
	MAKE_CASE(AArch64ISD::SMAXV_PRED)
	MAKE_CASE(AArch64ISD::UMAXV_PRED)
	MAKE_CASE(AArch64ISD::SMINV_PRED)
	MAKE_CASE(AArch64ISD::UMINV_PRED)
	MAKE_CASE(AArch64ISD::ORV_PRED)
	MAKE_CASE(AArch64ISD::EORV_PRED)
	MAKE_CASE(AArch64ISD::ANDV_PRED)
	MAKE_CASE(AArch64ISD::CLASTA_N)
	MAKE_CASE(AArch64ISD::CLASTB_N)
	MAKE_CASE(AArch64ISD::LASTA)
	MAKE_CASE(AArch64ISD::LASTB)
	MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
	MAKE_CASE(AArch64ISD::LS64_BUILD)
	MAKE_CASE(AArch64ISD::LS64_EXTRACT)
	MAKE_CASE(AArch64ISD::TBL)
	MAKE_CASE(AArch64ISD::FADD_PRED)
	MAKE_CASE(AArch64ISD::FADDA_PRED)
	MAKE_CASE(AArch64ISD::FADDV_PRED)
	MAKE_CASE(AArch64ISD::FDIV_PRED)
	MAKE_CASE(AArch64ISD::FMA_PRED)
	MAKE_CASE(AArch64ISD::FMAX_PRED)
	MAKE_CASE(AArch64ISD::FMAXV_PRED)
	MAKE_CASE(AArch64ISD::FMAXNM_PRED)
	MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
	MAKE_CASE(AArch64ISD::FMIN_PRED)
	MAKE_CASE(AArch64ISD::FMINV_PRED)
	MAKE_CASE(AArch64ISD::FMINNM_PRED)
	MAKE_CASE(AArch64ISD::FMINNMV_PRED)
	MAKE_CASE(AArch64ISD::FMUL_PRED)
	MAKE_CASE(AArch64ISD::FSUB_PRED)
	MAKE_CASE(AArch64ISD::RDSVL)
	MAKE_CASE(AArch64ISD::BIC)
	MAKE_CASE(AArch64ISD::BIT)
	MAKE_CASE(AArch64ISD::CBZ)
	MAKE_CASE(AArch64ISD::CBNZ)
	MAKE_CASE(AArch64ISD::TBZ)
	MAKE_CASE(AArch64ISD::TBNZ)
	MAKE_CASE(AArch64ISD::TC_RETURN)
	MAKE_CASE(AArch64ISD::PREFETCH)
	MAKE_CASE(AArch64ISD::SITOF)
	MAKE_CASE(AArch64ISD::UITOF)
	MAKE_CASE(AArch64ISD::NVCAST)
	MAKE_CASE(AArch64ISD::MRS)
	MAKE_CASE(AArch64ISD::SQSHL_I)
	MAKE_CASE(AArch64ISD::UQSHL_I)
	MAKE_CASE(AArch64ISD::SRSHR_I)
	MAKE_CASE(AArch64ISD::URSHR_I)
	MAKE_CASE(AArch64ISD::SQSHLU_I)
	MAKE_CASE(AArch64ISD::WrapperLarge)
	MAKE_CASE(AArch64ISD::LD2post)
	MAKE_CASE(AArch64ISD::LD3post)
	MAKE_CASE(AArch64ISD::LD4post)
	MAKE_CASE(AArch64ISD::ST2post)
	MAKE_CASE(AArch64ISD::ST3post)
	MAKE_CASE(AArch64ISD::ST4post)
	MAKE_CASE(AArch64ISD::LD1x2post)
	MAKE_CASE(AArch64ISD::LD1x3post)
	MAKE_CASE(AArch64ISD::LD1x4post)
	MAKE_CASE(AArch64ISD::ST1x2post)
	MAKE_CASE(AArch64ISD::ST1x3post)
	MAKE_CASE(AArch64ISD::ST1x4post)
	MAKE_CASE(AArch64ISD::LD1DUPpost)
	MAKE_CASE(AArch64ISD::LD2DUPpost)
	MAKE_CASE(AArch64ISD::LD3DUPpost)
	MAKE_CASE(AArch64ISD::LD4DUPpost)
	MAKE_CASE(AArch64ISD::LD1LANEpost)
	MAKE_CASE(AArch64ISD::LD2LANEpost)
	MAKE_CASE(AArch64ISD::LD3LANEpost)
	MAKE_CASE(AArch64ISD::LD4LANEpost)
	MAKE_CASE(AArch64ISD::ST2LANEpost)
	MAKE_CASE(AArch64ISD::ST3LANEpost)
	MAKE_CASE(AArch64ISD::ST4LANEpost)
	MAKE_CASE(AArch64ISD::SMULL)
	MAKE_CASE(AArch64ISD::UMULL)
	MAKE_CASE(AArch64ISD::FRECPE)
	MAKE_CASE(AArch64ISD::FRECPS)
	MAKE_CASE(AArch64ISD::FRSQRTE)
	MAKE_CASE(AArch64ISD::FRSQRTS)
	MAKE_CASE(AArch64ISD::STG)
	MAKE_CASE(AArch64ISD::STZG)
	MAKE_CASE(AArch64ISD::ST2G)
	MAKE_CASE(AArch64ISD::STZ2G)
	MAKE_CASE(AArch64ISD::SUNPKHI)
	MAKE_CASE(AArch64ISD::SUNPKLO)
	MAKE_CASE(AArch64ISD::UUNPKHI)
	MAKE_CASE(AArch64ISD::UUNPKLO)
	MAKE_CASE(AArch64ISD::INSR)
	MAKE_CASE(AArch64ISD::PTEST)
	MAKE_CASE(AArch64ISD::PTRUE)
	MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
	MAKE_CASE(AArch64ISD::ST1_PRED)
	MAKE_CASE(AArch64ISD::SST1_PRED)
	MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
	MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
	MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
	MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
	MAKE_CASE(AArch64ISD::SSTNT1_PRED)
	MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
	MAKE_CASE(AArch64ISD::LDP)
	MAKE_CASE(AArch64ISD::STP)
	MAKE_CASE(AArch64ISD::STNP)
	MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
	MAKE_CASE(AArch64ISD::INDEX_VECTOR)
	MAKE_CASE(AArch64ISD::ADDP)
	MAKE_CASE(AArch64ISD::SADDLP)
	MAKE_CASE(AArch64ISD::UADDLP)
	MAKE_CASE(AArch64ISD::CALL_RVMARKER)
	MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
	MAKE_CASE(AArch64ISD::MOPS_MEMSET)
	MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
	MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
	MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
	MAKE_CASE(AArch64ISD::CALL_BTI)
	}
	#undef MAKE_CASE
	return nullptr;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
	MachineBasicBlock *MBB) const {
	// We materialise the F128CSEL pseudo-instruction as some control flow and a
	// phi node:

	// OrigBB:
	// [... previous instrs leading to comparison ...]
	// b.ne TrueBB
	// b EndBB
	// TrueBB:
	// ; Fallthrough
	// EndBB:
	// Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]

	MachineFunction *MF = MBB->getParent();
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	const BasicBlock *LLVM_BB = MBB->getBasicBlock();
	DebugLoc DL = MI.getDebugLoc();
	MachineFunction::iterator It = ++MBB->getIterator();

	Register DestReg = MI.getOperand(0).getReg();
	Register IfTrueReg = MI.getOperand(1).getReg();
	Register IfFalseReg = MI.getOperand(2).getReg();
	unsigned CondCode = MI.getOperand(3).getImm();
	bool NZCVKilled = MI.getOperand(4).isKill();

	MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
	MF->insert(It, TrueBB);
	MF->insert(It, EndBB);

	// Transfer rest of current basic-block to EndBB
	EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
	MBB->end());
	EndBB->transferSuccessorsAndUpdatePHIs(MBB);

	BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
	BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
	MBB->addSuccessor(TrueBB);
	MBB->addSuccessor(EndBB);

	// TrueBB falls through to the end.
	TrueBB->addSuccessor(EndBB);

	if (!NZCVKilled) {
	TrueBB->addLiveIn(AArch64::NZCV);
	EndBB->addLiveIn(AArch64::NZCV);
	}

	BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
	.addReg(IfTrueReg)
	.addMBB(TrueBB)
	.addReg(IfFalseReg)
	.addMBB(MBB);

	MI.eraseFromParent();
	return EndBB;
	}

	MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
	MachineInstr &MI, MachineBasicBlock *BB) const {
	assert(!isAsynchronousEHPersonality(classifyEHPersonality(
	BB->getParent()->getFunction().getPersonalityFn())) &&
	"SEH does not use catchret!");
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));

	MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
	MIB.add(MI.getOperand(1)); // slice index register
	MIB.add(MI.getOperand(2)); // slice index offset
	MIB.add(MI.getOperand(3)); // pg
	MIB.add(MI.getOperand(4)); // base
	MIB.add(MI.getOperand(5)); // offset

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_ZA));

	MIB.addReg(AArch64::ZA, RegState::Define);
	MIB.add(MI.getOperand(0)); // Vector select register
	MIB.add(MI.getOperand(1)); // Vector select offset
	MIB.add(MI.getOperand(2)); // Base
	MIB.add(MI.getOperand(1)); // Offset, same as vector select offset

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI, MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));

	MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
	MIB.addReg(BaseReg + MI.getOperand(0).getImm());
	MIB.add(MI.getOperand(1)); // pn
	MIB.add(MI.getOperand(2)); // pm
	MIB.add(MI.getOperand(3)); // zn
	MIB.add(MI.getOperand(4)); // zm

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));

	MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
	MIB.addReg(BaseReg + MI.getOperand(0).getImm());
	MIB.add(MI.getOperand(1)); // Slice index register
	MIB.add(MI.getOperand(2)); // Slice index offset
	MIB.add(MI.getOperand(3)); // pg
	MIB.add(MI.getOperand(4)); // zn

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB =
	BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M));
	MIB.add(MI.getOperand(0)); // Mask

	unsigned Mask = MI.getOperand(0).getImm();
	for (unsigned I = 0; I < 8; I++) {
	if (Mask & (1 << I))
	MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine);
	}

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *
	AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc));

	MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define);
	MIB.addReg(BaseReg + MI.getOperand(0).getImm());
	MIB.add(MI.getOperand(1)); // pn
	MIB.add(MI.getOperand(2)); // pm
	MIB.add(MI.getOperand(3)); // zn

	MI.eraseFromParent(); // The pseudo is gone now.
	return BB;
	}

	MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
	MachineInstr &MI, MachineBasicBlock *BB) const {
	switch (MI.getOpcode()) {
	default:
	#ifndef NDEBUG
	MI.dump();
	#endif
	llvm_unreachable("Unexpected instruction for custom inserter!");

	case AArch64::F128CSEL:
	return EmitF128CSEL(MI, BB);

	case TargetOpcode::STATEPOINT:
	// STATEPOINT is a pseudo instruction which has no implicit defs/uses
	// while bl call instruction (where statepoint will be lowered at the end)
	// has implicit def. This def is early-clobber as it will be set at
	// the moment of the call and earlier than any use is read.
	// Add this implicit dead def here as a workaround.
	MI.addOperand(*MI.getMF(),
	MachineOperand::CreateReg(
	AArch64::LR, /isDef/ true,
	/isImp/ true, /isKill/ false, /isDead/ true,
	/isUndef/ false, /isEarlyClobber/ true));
	LLVM_FALLTHROUGH;
	case TargetOpcode::STACKMAP:
	case TargetOpcode::PATCHPOINT:
	return emitPatchPoint(MI, BB);

	case AArch64::CATCHRET:
	return EmitLoweredCatchRet(MI, BB);
	case AArch64::LD1_MXIPXX_H_PSEUDO_B:
	return EmitTileLoad(AArch64::LD1_MXIPXX_H_B, AArch64::ZAB0, MI, BB);
	case AArch64::LD1_MXIPXX_H_PSEUDO_H:
	return EmitTileLoad(AArch64::LD1_MXIPXX_H_H, AArch64::ZAH0, MI, BB);
	case AArch64::LD1_MXIPXX_H_PSEUDO_S:
	return EmitTileLoad(AArch64::LD1_MXIPXX_H_S, AArch64::ZAS0, MI, BB);
	case AArch64::LD1_MXIPXX_H_PSEUDO_D:
	return EmitTileLoad(AArch64::LD1_MXIPXX_H_D, AArch64::ZAD0, MI, BB);
	case AArch64::LD1_MXIPXX_H_PSEUDO_Q:
	return EmitTileLoad(AArch64::LD1_MXIPXX_H_Q, AArch64::ZAQ0, MI, BB);
	case AArch64::LD1_MXIPXX_V_PSEUDO_B:
	return EmitTileLoad(AArch64::LD1_MXIPXX_V_B, AArch64::ZAB0, MI, BB);
	case AArch64::LD1_MXIPXX_V_PSEUDO_H:
	return EmitTileLoad(AArch64::LD1_MXIPXX_V_H, AArch64::ZAH0, MI, BB);
	case AArch64::LD1_MXIPXX_V_PSEUDO_S:
	return EmitTileLoad(AArch64::LD1_MXIPXX_V_S, AArch64::ZAS0, MI, BB);
	case AArch64::LD1_MXIPXX_V_PSEUDO_D:
	return EmitTileLoad(AArch64::LD1_MXIPXX_V_D, AArch64::ZAD0, MI, BB);
	case AArch64::LD1_MXIPXX_V_PSEUDO_Q:
	return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB);
	case AArch64::LDR_ZA_PSEUDO:
	return EmitFill(MI, BB);
	case AArch64::BFMOPA_MPPZZ_PSEUDO:
	return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB);
	case AArch64::BFMOPS_MPPZZ_PSEUDO:
	return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB);
	case AArch64::FMOPAL_MPPZZ_PSEUDO:
	return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB);
	case AArch64::FMOPSL_MPPZZ_PSEUDO:
	return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB);
	case AArch64::FMOPA_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::FMOPS_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::FMOPA_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::FMOPS_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::SMOPA_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::SMOPS_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::UMOPA_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::UMOPS_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::SUMOPA_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::SUMOPS_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::USMOPA_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::USMOPS_MPPZZ_S_PSEUDO:
	return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::SMOPA_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::SMOPS_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::UMOPA_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::UMOPS_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::SUMOPA_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::SUMOPS_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::USMOPA_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::USMOPS_MPPZZ_D_PSEUDO:
	return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::INSERT_MXIPZ_H_PSEUDO_B:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_H_PSEUDO_H:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_H_PSEUDO_S:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_H_PSEUDO_D:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_H_PSEUDO_Q:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_V_PSEUDO_B:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_V_PSEUDO_H:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_V_PSEUDO_S:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_V_PSEUDO_D:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI,
	BB);
	case AArch64::INSERT_MXIPZ_V_PSEUDO_Q:
	return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI,
	BB);
	case AArch64::ZERO_M_PSEUDO:
	return EmitZero(MI, BB);
	case AArch64::ADDHA_MPPZ_PSEUDO_S:
	return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::ADDVA_MPPZ_PSEUDO_S:
	return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB);
	case AArch64::ADDHA_MPPZ_PSEUDO_D:
	return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB);
	case AArch64::ADDVA_MPPZ_PSEUDO_D:
	return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB);
	}
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Lowering private implementation.
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// Lowering Code
	//===----------------------------------------------------------------------===//

	// Forward declarations of SVE fixed length lowering helpers
	static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
	static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
	static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
	static SDValue convertFixedMaskToScalableVector(SDValue Mask,
	SelectionDAG &DAG);
	static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
	EVT VT);

	/// isZerosVector - Check whether SDNode N is a zero-filled vector.
	static bool isZerosVector(const SDNode *N) {
	// Look through a bit convert.
	while (N->getOpcode() == ISD::BITCAST)
	N = N->getOperand(0).getNode();

	if (ISD::isConstantSplatVectorAllZeros(N))
	return true;

	if (N->getOpcode() != AArch64ISD::DUP)
	return false;

	auto Opnd0 = N->getOperand(0);
	auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
	auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
	return (CINT && CINT->isZero()) \|\| (CFP && CFP->isZero());
	}

	/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
	/// CC
	static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
	switch (CC) {
	default:
	llvm_unreachable("Unknown condition code!");
	case ISD::SETNE:
	return AArch64CC::NE;
	case ISD::SETEQ:
	return AArch64CC::EQ;
	case ISD::SETGT:
	return AArch64CC::GT;
	case ISD::SETGE:
	return AArch64CC::GE;
	case ISD::SETLT:
	return AArch64CC::LT;
	case ISD::SETLE:
	return AArch64CC::LE;
	case ISD::SETUGT:
	return AArch64CC::HI;
	case ISD::SETUGE:
	return AArch64CC::HS;
	case ISD::SETULT:
	return AArch64CC::LO;
	case ISD::SETULE:
	return AArch64CC::LS;
	}
	}

	/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
	static void changeFPCCToAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2) {
	CondCode2 = AArch64CC::AL;
	switch (CC) {
	default:
	llvm_unreachable("Unknown FP condition!");
	case ISD::SETEQ:
	case ISD::SETOEQ:
	CondCode = AArch64CC::EQ;
	break;
	case ISD::SETGT:
	case ISD::SETOGT:
	CondCode = AArch64CC::GT;
	break;
	case ISD::SETGE:
	case ISD::SETOGE:
	CondCode = AArch64CC::GE;
	break;
	case ISD::SETOLT:
	CondCode = AArch64CC::MI;
	break;
	case ISD::SETOLE:
	CondCode = AArch64CC::LS;
	break;
	case ISD::SETONE:
	CondCode = AArch64CC::MI;
	CondCode2 = AArch64CC::GT;
	break;
	case ISD::SETO:
	CondCode = AArch64CC::VC;
	break;
	case ISD::SETUO:
	CondCode = AArch64CC::VS;
	break;
	case ISD::SETUEQ:
	CondCode = AArch64CC::EQ;
	CondCode2 = AArch64CC::VS;
	break;
	case ISD::SETUGT:
	CondCode = AArch64CC::HI;
	break;
	case ISD::SETUGE:
	CondCode = AArch64CC::PL;
	break;
	case ISD::SETLT:
	case ISD::SETULT:
	CondCode = AArch64CC::LT;
	break;
	case ISD::SETLE:
	case ISD::SETULE:
	CondCode = AArch64CC::LE;
	break;
	case ISD::SETNE:
	case ISD::SETUNE:
	CondCode = AArch64CC::NE;
	break;
	}
	}

	/// Convert a DAG fp condition code to an AArch64 CC.
	/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
	/// should be AND'ed instead of OR'ed.
	static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2) {
	CondCode2 = AArch64CC::AL;
	switch (CC) {
	default:
	changeFPCCToAArch64CC(CC, CondCode, CondCode2);
	assert(CondCode2 == AArch64CC::AL);
	break;
	case ISD::SETONE:
	// (a one b)
	// == ((a olt b) \|\| (a ogt b))
	// == ((a ord b) && (a une b))
	CondCode = AArch64CC::VC;
	CondCode2 = AArch64CC::NE;
	break;
	case ISD::SETUEQ:
	// (a ueq b)
	// == ((a uno b) \|\| (a oeq b))
	// == ((a ule b) && (a uge b))
	CondCode = AArch64CC::PL;
	CondCode2 = AArch64CC::LE;
	break;
	}
	}

	/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
	/// CC usable with the vector instructions. Fewer operations are available
	/// without a real NZCV register, so we have to use less efficient combinations
	/// to get the same effect.
	static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
	AArch64CC::CondCode &CondCode,
	AArch64CC::CondCode &CondCode2,
	bool &Invert) {
	Invert = false;
	switch (CC) {
	default:
	// Mostly the scalar mappings work fine.
	changeFPCCToAArch64CC(CC, CondCode, CondCode2);
	break;
	case ISD::SETUO:
	Invert = true;
	LLVM_FALLTHROUGH;
	case ISD::SETO:
	CondCode = AArch64CC::MI;
	CondCode2 = AArch64CC::GE;
	break;
	case ISD::SETUEQ:
	case ISD::SETULT:
	case ISD::SETULE:
	case ISD::SETUGT:
	case ISD::SETUGE:
	// All of the compare-mask comparisons are ordered, but we can switch
	// between the two by a double inversion. E.g. ULE == !OGT.
	Invert = true;
	changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
	CondCode, CondCode2);
	break;
	}
	}

	static bool isLegalArithImmed(uint64_t C) {
	// Matches AArch64DAGToDAGISel::SelectArithImmed().
	bool IsLegal = (C >> 12 == 0) \|\| ((C & 0xFFFULL) == 0 && C >> 24 == 0);
	LLVM_DEBUG(dbgs() << "Is imm " << C
	<< " legal: " << (IsLegal ? "yes\n" : "no\n"));
	return IsLegal;
	}

	// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
	// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
	// can be set differently by this operation. It comes down to whether
	// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
	// everything is fine. If not then the optimization is wrong. Thus general
	// comparisons are only valid if op2 != 0.
	//
	// So, finally, the only LLVM-native comparisons that don't mention C and V
	// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
	// the absence of information about op2.
	static bool isCMN(SDValue Op, ISD::CondCode CC) {
	return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE);
	}

	static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
	SelectionDAG &DAG, SDValue Chain,
	bool IsSignaling) {
	EVT VT = LHS.getValueType();
	assert(VT != MVT::f128);

	const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();

	if (VT == MVT::f16 && !FullFP16) {
	LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
	{Chain, LHS});
	RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
	{LHS.getValue(1), RHS});
	Chain = RHS.getValue(1);
	VT = MVT::f32;
	}
	unsigned Opcode =
	IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
	return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
	}

	static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
	const SDLoc &dl, SelectionDAG &DAG) {
	EVT VT = LHS.getValueType();
	const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();

	if (VT.isFloatingPoint()) {
	assert(VT != MVT::f128);
	if (VT == MVT::f16 && !FullFP16) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
	VT = MVT::f32;
	}
	return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
	}

	// The CMP instruction is just an alias for SUBS, and representing it as
	// SUBS means that it's possible to get CSE with subtract operations.
	// A later phase can perform the optimization of setting the destination
	// register to WZR/XZR if it ends up being unused.
	unsigned Opcode = AArch64ISD::SUBS;

	if (isCMN(RHS, CC)) {
	// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
	Opcode = AArch64ISD::ADDS;
	RHS = RHS.getOperand(1);
	} else if (isCMN(LHS, CC)) {
	// As we are looking for EQ/NE compares, the operands can be commuted ; can
	// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
	Opcode = AArch64ISD::ADDS;
	LHS = LHS.getOperand(1);
	} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
	if (LHS.getOpcode() == ISD::AND) {
	// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
	// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
	// of the signed comparisons.
	const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
	DAG.getVTList(VT, MVT_CC),
	LHS.getOperand(0),
	LHS.getOperand(1));
	// Replace all users of (and X, Y) with newly generated (ands X, Y)
	DAG.ReplaceAllUsesWith(LHS, ANDSNode);
	return ANDSNode.getValue(1);
	} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
	// Use result of ANDS
	return LHS.getValue(1);
	}
	}

	return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
	.getValue(1);
	}

	/// \defgroup AArch64CCMP CMP;CCMP matching
	///
	/// These functions deal with the formation of CMP;CCMP;... sequences.
	/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
	/// a comparison. They set the NZCV flags to a predefined value if their
	/// predicate is false. This allows to express arbitrary conjunctions, for
	/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
	/// expressed as:
	/// cmp A
	/// ccmp B, inv(CB), CA
	/// check for CB flags
	///
	/// This naturally lets us implement chains of AND operations with SETCC
	/// operands. And we can even implement some other situations by transforming
	/// them:
	/// - We can implement (NEG SETCC) i.e. negating a single comparison by
	/// negating the flags used in a CCMP/FCCMP operations.
	/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
	/// by negating the flags we test for afterwards. i.e.
	/// NEG (CMP CCMP CCCMP ...) can be implemented.
	/// - Note that we can only ever negate all previously processed results.
	/// What we can not implement by flipping the flags to test is a negation
	/// of two sub-trees (because the negation affects all sub-trees emitted so
	/// far, so the 2nd sub-tree we emit would also affect the first).
	/// With those tools we can implement some OR operations:
	/// - (OR (SETCC A) (SETCC B)) can be implemented via:
	/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
	/// - After transforming OR to NEG/AND combinations we may be able to use NEG
	/// elimination rules from earlier to implement the whole thing as a
	/// CCMP/FCCMP chain.
	///
	/// As complete example:
	/// or (or (setCA (cmp A)) (setCB (cmp B)))
	/// (and (setCC (cmp C)) (setCD (cmp D)))"
	/// can be reassociated to:
	/// or (and (setCC (cmp C)) setCD (cmp D))
	// (or (setCA (cmp A)) (setCB (cmp B)))
	/// can be transformed to:
	/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
	/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
	/// which can be implemented as:
	/// cmp C
	/// ccmp D, inv(CD), CC
	/// ccmp A, CA, inv(CD)
	/// ccmp B, CB, inv(CA)
	/// check for CB flags
	///
	/// A counterexample is "or (and A B) (and C D)" which translates to
	/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
	/// can only implement 1 of the inner (not) operations, but not both!
	/// @{

	/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
	static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
	ISD::CondCode CC, SDValue CCOp,
	AArch64CC::CondCode Predicate,
	AArch64CC::CondCode OutCC,
	const SDLoc &DL, SelectionDAG &DAG) {
	unsigned Opcode = 0;
	const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();

	if (LHS.getValueType().isFloatingPoint()) {
	assert(LHS.getValueType() != MVT::f128);
	if (LHS.getValueType() == MVT::f16 && !FullFP16) {
	LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
	}
	Opcode = AArch64ISD::FCCMP;
	} else if (RHS.getOpcode() == ISD::SUB) {
	SDValue SubOp0 = RHS.getOperand(0);
	if (isNullConstant(SubOp0) && (CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	// See emitComparison() on why we can only do this for SETEQ and SETNE.
	Opcode = AArch64ISD::CCMN;
	RHS = RHS.getOperand(1);
	}
	}
	if (Opcode == 0)
	Opcode = AArch64ISD::CCMP;

	SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
	AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
	SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
	return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
	}

	/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
	/// expressed as a conjunction. See \ref AArch64CCMP.
	/// \param CanNegate Set to true if we can negate the whole sub-tree just by
	/// changing the conditions on the SETCC tests.
	/// (this means we can call emitConjunctionRec() with
	/// Negate==true on this sub-tree)
	/// \param MustBeFirst Set to true if this subtree needs to be negated and we
	/// cannot do the negation naturally. We are required to
	/// emit the subtree first in this case.
	/// \param WillNegate Is true if are called when the result of this
	/// subexpression must be negated. This happens when the
	/// outer expression is an OR. We can use this fact to know
	/// that we have a double negation (or (or ...) ...) that
	/// can be implemented for free.
	static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
	bool &MustBeFirst, bool WillNegate,
	unsigned Depth = 0) {
	if (!Val.hasOneUse())
	return false;
	unsigned Opcode = Val->getOpcode();
	if (Opcode == ISD::SETCC) {
	if (Val->getOperand(0).getValueType() == MVT::f128)
	return false;
	CanNegate = true;
	MustBeFirst = false;
	return true;
	}
	// Protect against exponential runtime and stack overflow.
	if (Depth > 6)
	return false;
	if (Opcode == ISD::AND \|\| Opcode == ISD::OR) {
	bool IsOR = Opcode == ISD::OR;
	SDValue O0 = Val->getOperand(0);
	SDValue O1 = Val->getOperand(1);
	bool CanNegateL;
	bool MustBeFirstL;
	if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
	return false;
	bool CanNegateR;
	bool MustBeFirstR;
	if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
	return false;

	if (MustBeFirstL && MustBeFirstR)
	return false;

	if (IsOR) {
	// For an OR expression we need to be able to naturally negate at least
	// one side or we cannot do the transformation at all.
	if (!CanNegateL && !CanNegateR)
	return false;
	// If we the result of the OR will be negated and we can naturally negate
	// the leafs, then this sub-tree as a whole negates naturally.
	CanNegate = WillNegate && CanNegateL && CanNegateR;
	// If we cannot naturally negate the whole sub-tree, then this must be
	// emitted first.
	MustBeFirst = !CanNegate;
	} else {
	assert(Opcode == ISD::AND && "Must be OR or AND");
	// We cannot naturally negate an AND operation.
	CanNegate = false;
	MustBeFirst = MustBeFirstL \|\| MustBeFirstR;
	}
	return true;
	}
	return false;
	}

	/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
	/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
	/// Tries to transform the given i1 producing node @p Val to a series compare
	/// and conditional compare operations. @returns an NZCV flags producing node
	/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
	/// transformation was not possible.
	/// \p Negate is true if we want this sub-tree being negated just by changing
	/// SETCC conditions.
	static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
	AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
	AArch64CC::CondCode Predicate) {
	// We're at a tree leaf, produce a conditional comparison operation.
	unsigned Opcode = Val->getOpcode();
	if (Opcode == ISD::SETCC) {
	SDValue LHS = Val->getOperand(0);
	SDValue RHS = Val->getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
	bool isInteger = LHS.getValueType().isInteger();
	if (Negate)
	CC = getSetCCInverse(CC, LHS.getValueType());
	SDLoc DL(Val);
	// Determine OutCC and handle FP special case.
	if (isInteger) {
	OutCC = changeIntCCToAArch64CC(CC);
	} else {
	assert(LHS.getValueType().isFloatingPoint());
	AArch64CC::CondCode ExtraCC;
	changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
	// Some floating point conditions can't be tested with a single condition
	// code. Construct an additional comparison in this case.
	if (ExtraCC != AArch64CC::AL) {
	SDValue ExtraCmp;
	if (!CCOp.getNode())
	ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
	else
	ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
	ExtraCC, DL, DAG);
	CCOp = ExtraCmp;
	Predicate = ExtraCC;
	}
	}

	// Produce a normal comparison if we are first in the chain
	if (!CCOp)
	return emitComparison(LHS, RHS, CC, DL, DAG);
	// Otherwise produce a ccmp.
	return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
	DAG);
	}
	assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");

	bool IsOR = Opcode == ISD::OR;

	SDValue LHS = Val->getOperand(0);
	bool CanNegateL;
	bool MustBeFirstL;
	bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
	assert(ValidL && "Valid conjunction/disjunction tree");
	(void)ValidL;

	SDValue RHS = Val->getOperand(1);
	bool CanNegateR;
	bool MustBeFirstR;
	bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
	assert(ValidR && "Valid conjunction/disjunction tree");
	(void)ValidR;

	// Swap sub-tree that must come first to the right side.
	if (MustBeFirstL) {
	assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
	std::swap(LHS, RHS);
	std::swap(CanNegateL, CanNegateR);
	std::swap(MustBeFirstL, MustBeFirstR);
	}

	bool NegateR;
	bool NegateAfterR;
	bool NegateL;
	bool NegateAfterAll;
	if (Opcode == ISD::OR) {
	// Swap the sub-tree that we can negate naturally to the left.
	if (!CanNegateL) {
	assert(CanNegateR && "at least one side must be negatable");
	assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
	assert(!Negate);
	std::swap(LHS, RHS);
	NegateR = false;
	NegateAfterR = true;
	} else {
	// Negate the left sub-tree if possible, otherwise negate the result.
	NegateR = CanNegateR;
	NegateAfterR = !CanNegateR;
	}
	NegateL = true;
	NegateAfterAll = !Negate;
	} else {
	assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
	assert(!Negate && "Valid conjunction/disjunction tree");

	NegateL = false;
	NegateR = false;
	NegateAfterR = false;
	NegateAfterAll = false;
	}

	// Emit sub-trees.
	AArch64CC::CondCode RHSCC;
	SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
	if (NegateAfterR)
	RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
	SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
	if (NegateAfterAll)
	OutCC = AArch64CC::getInvertedCondCode(OutCC);
	return CmpL;
	}

	/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
	/// In some cases this is even possible with OR operations in the expression.
	/// See \ref AArch64CCMP.
	/// \see emitConjunctionRec().
	static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
	AArch64CC::CondCode &OutCC) {
	bool DummyCanNegate;
	bool DummyMustBeFirst;
	if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
	return SDValue();

	return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
	}

	/// @}

	/// Returns how profitable it is to fold a comparison's operand's shift and/or
	/// extension operations.
	static unsigned getCmpOperandFoldingProfit(SDValue Op) {
	auto isSupportedExtend = [&](SDValue V) {
	if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
	return true;

	if (V.getOpcode() == ISD::AND)
	if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
	uint64_t Mask = MaskCst->getZExtValue();
	return (Mask == 0xFF \|\| Mask == 0xFFFF \|\| Mask == 0xFFFFFFFF);
	}

	return false;
	};

	if (!Op.hasOneUse())
	return 0;

	if (isSupportedExtend(Op))
	return 1;

	unsigned Opc = Op.getOpcode();
	if (Opc == ISD::SHL \|\| Opc == ISD::SRL \|\| Opc == ISD::SRA)
	if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
	uint64_t Shift = ShiftCst->getZExtValue();
	if (isSupportedExtend(Op.getOperand(0)))
	return (Shift <= 4) ? 2 : 1;
	EVT VT = Op.getValueType();
	if ((VT == MVT::i32 && Shift <= 31) \|\| (VT == MVT::i64 && Shift <= 63))
	return 1;
	}

	return 0;
	}

	static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
	SDValue &AArch64cc, SelectionDAG &DAG,
	const SDLoc &dl) {
	if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
	EVT VT = RHS.getValueType();
	uint64_t C = RHSC->getZExtValue();
	if (!isLegalArithImmed(C)) {
	// Constant does not fit, try adjusting it by one?
	switch (CC) {
	default:
	break;
	case ISD::SETLT:
	case ISD::SETGE:
	if ((VT == MVT::i32 && C != 0x80000000 &&
	isLegalArithImmed((uint32_t)(C - 1))) \|\|
	(VT == MVT::i64 && C != 0x80000000ULL &&
	isLegalArithImmed(C - 1ULL))) {
	CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
	C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETULT:
	case ISD::SETUGE:
	if ((VT == MVT::i32 && C != 0 &&
	isLegalArithImmed((uint32_t)(C - 1))) \|\|
	(VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
	CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
	C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETLE:
	case ISD::SETGT:
	if ((VT == MVT::i32 && C != INT32_MAX &&
	isLegalArithImmed((uint32_t)(C + 1))) \|\|
	(VT == MVT::i64 && C != INT64_MAX &&
	isLegalArithImmed(C + 1ULL))) {
	CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
	C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	case ISD::SETULE:
	case ISD::SETUGT:
	if ((VT == MVT::i32 && C != UINT32_MAX &&
	isLegalArithImmed((uint32_t)(C + 1))) \|\|
	(VT == MVT::i64 && C != UINT64_MAX &&
	isLegalArithImmed(C + 1ULL))) {
	CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
	C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
	RHS = DAG.getConstant(C, dl, VT);
	}
	break;
	}
	}
	}

	// Comparisons are canonicalized so that the RHS operand is simpler than the
	// LHS one, the extreme case being when RHS is an immediate. However, AArch64
	// can fold some shift+extend operations on the RHS operand, so swap the
	// operands if that can be done.
	//
	// For example:
	// lsl w13, w11, #1
	// cmp w13, w12
	// can be turned into:
	// cmp w12, w11, lsl #1
	if (!isa<ConstantSDNode>(RHS) \|\|
	!isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
	SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;

	if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
	std::swap(LHS, RHS);
	CC = ISD::getSetCCSwappedOperands(CC);
	}
	}

	SDValue Cmp;
	AArch64CC::CondCode AArch64CC;
	if ((CC == ISD::SETEQ \|\| CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
	const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);

	// The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
	// For the i8 operand, the largest immediate is 255, so this can be easily
	// encoded in the compare instruction. For the i16 operand, however, the
	// largest immediate cannot be encoded in the compare.
	// Therefore, use a sign extending load and cmn to avoid materializing the
	// -1 constant. For example,
	// movz w1, #65535
	// ldrh w0, [x0, #0]
	// cmp w0, w1
	// >
	// ldrsh w0, [x0, #0]
	// cmn w0, #1
	// Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
	// if and only if (sext LHS) == (sext RHS). The checks are in place to
	// ensure both the LHS and RHS are truly zero extended and to make sure the
	// transformation is profitable.
	if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
	cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
	cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
	LHS.getNode()->hasNUsesOfValue(1, 0)) {
	int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
	if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
	SDValue SExt =
	DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
	DAG.getValueType(MVT::i16));
	Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
	RHS.getValueType()),
	CC, dl, DAG);
	AArch64CC = changeIntCCToAArch64CC(CC);
	}
	}

	if (!Cmp && (RHSC->isZero() \|\| RHSC->isOne())) {
	if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
	if ((CC == ISD::SETNE) ^ RHSC->isZero())
	AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
	}
	}
	}

	if (!Cmp) {
	Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
	AArch64CC = changeIntCCToAArch64CC(CC);
	}
	AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
	return Cmp;
	}

	static std::pair<SDValue, SDValue>
	getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
	assert((Op.getValueType() == MVT::i32 \|\| Op.getValueType() == MVT::i64) &&
	"Unsupported value type");
	SDValue Value, Overflow;
	SDLoc DL(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	unsigned Opc = 0;
	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("Unknown overflow instruction!");
	case ISD::SADDO:
	Opc = AArch64ISD::ADDS;
	CC = AArch64CC::VS;
	break;
	case ISD::UADDO:
	Opc = AArch64ISD::ADDS;
	CC = AArch64CC::HS;
	break;
	case ISD::SSUBO:
	Opc = AArch64ISD::SUBS;
	CC = AArch64CC::VS;
	break;
	case ISD::USUBO:
	Opc = AArch64ISD::SUBS;
	CC = AArch64CC::LO;
	break;
	// Multiply needs a little bit extra work.
	case ISD::SMULO:
	case ISD::UMULO: {
	CC = AArch64CC::NE;
	bool IsSigned = Op.getOpcode() == ISD::SMULO;
	if (Op.getValueType() == MVT::i32) {
	// Extend to 64-bits, then perform a 64-bit multiply.
	unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
	RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
	SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
	Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);

	// Check that the result fits into a 32-bit integer.
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
	if (IsSigned) {
	// cmp xreg, wreg, sxtw
	SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
	Overflow =
	DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
	} else {
	// tst xreg, #0xffffffff00000000
	SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
	Overflow =
	DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
	}
	break;
	}
	assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
	// For the 64 bit multiply
	Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
	if (IsSigned) {
	SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
	SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
	DAG.getConstant(63, DL, MVT::i64));
	// It is important that LowerBits is last, otherwise the arithmetic
	// shift will not be folded into the compare (SUBS).
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
	Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
	.getValue(1);
	} else {
	SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
	Overflow =
	DAG.getNode(AArch64ISD::SUBS, DL, VTs,
	DAG.getConstant(0, DL, MVT::i64),
	UpperBits).getValue(1);
	}
	break;
	}
	} // switch (...)

	if (Opc) {
	SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);

	// Emit the AArch64 operation with overflow check.
	Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
	Overflow = Value.getValue(1);
	}
	return std::make_pair(Value, Overflow);
	}

	SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerToScalableOp(Op, DAG);

	SDValue Sel = Op.getOperand(0);
	SDValue Other = Op.getOperand(1);
	SDLoc dl(Sel);

	// If the operand is an overflow checking operation, invert the condition
	// code and kill the Not operation. I.e., transform:
	// (xor (overflow_op_bool, 1))
	// -->
	// (csel 1, 0, invert(cc), overflow_op_bool)
	// ... which later gets transformed to just a cset instruction with an
	// inverted condition code, rather than a cset + eor sequence.
	if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
	return SDValue();

	SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
	SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
	AArch64CC::CondCode CC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
	SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
	CCVal, Overflow);
	}
	// If neither operand is a SELECT_CC, give up.
	if (Sel.getOpcode() != ISD::SELECT_CC)
	std::swap(Sel, Other);
	if (Sel.getOpcode() != ISD::SELECT_CC)
	return Op;

	// The folding we want to perform is:
	// (xor x, (select_cc a, b, cc, 0, -1) )
	// -->
	// (csel x, (xor x, -1), cc ...)
	//
	// The latter will get matched to a CSINV instruction.

	ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
	SDValue LHS = Sel.getOperand(0);
	SDValue RHS = Sel.getOperand(1);
	SDValue TVal = Sel.getOperand(2);
	SDValue FVal = Sel.getOperand(3);

	// FIXME: This could be generalized to non-integer comparisons.
	if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
	return Op;

	ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
	ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);

	// The values aren't constants, this isn't the pattern we're looking for.
	if (!CFVal \|\| !CTVal)
	return Op;

	// We can commute the SELECT_CC by inverting the condition. This
	// might be needed to make this fit into a CSINV pattern.
	if (CTVal->isAllOnes() && CFVal->isZero()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}

	// If the constants line up, perform the transform!
	if (CTVal->isZero() && CFVal->isAllOnes()) {
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);

	FVal = Other;
	TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
	DAG.getConstant(-1ULL, dl, Other.getValueType()));

	return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
	CCVal, Cmp);
	}

	return Op;
	}

	// If Invert is false, sets 'C' bit of NZCV to 0 if value is 0, else sets 'C'
	// bit to 1. If Invert is true, sets 'C' bit of NZCV to 1 if value is 0, else
	// sets 'C' bit to 0.
	static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
	SDLoc DL(Value);
	EVT VT = Value.getValueType();
	SDValue Op0 = Invert ? DAG.getConstant(0, DL, VT) : Value;
	SDValue Op1 = Invert ? Value : DAG.getConstant(1, DL, VT);
	SDValue Cmp =
	DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::Glue), Op0, Op1);
	return Cmp.getValue(1);
	}

	// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
	// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
	static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
	bool Invert) {
	assert(Flag.getResNo() == 1);
	SDLoc DL(Flag);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue One = DAG.getConstant(1, DL, VT);
	unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
	SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
	}

	// Value is 1 if 'V' bit of NZCV is 1, else 0
	static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
	assert(Flag.getResNo() == 1);
	SDLoc DL(Flag);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue One = DAG.getConstant(1, DL, VT);
	SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
	}

	// This lowering is inefficient, but it will get cleaned up by
	// `foldOverflowCheck`
	static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
	bool IsSigned) {
	EVT VT0 = Op.getValue(0).getValueType();
	EVT VT1 = Op.getValue(1).getValueType();

	if (VT0 != MVT::i32 && VT0 != MVT::i64)
	return SDValue();

	bool InvertCarry = Opcode == AArch64ISD::SBCS;
	SDValue OpLHS = Op.getOperand(0);
	SDValue OpRHS = Op.getOperand(1);
	SDValue OpCarryIn = valueToCarryFlag(Op.getOperand(2), DAG, InvertCarry);

	SDLoc DL(Op);
	SDVTList VTs = DAG.getVTList(VT0, VT1);

	SDValue Sum = DAG.getNode(Opcode, DL, DAG.getVTList(VT0, MVT::Glue), OpLHS,
	OpRHS, OpCarryIn);

	SDValue OutFlag =
	IsSigned ? overflowFlagToValue(Sum.getValue(1), VT1, DAG)
	: carryFlagToValue(Sum.getValue(1), VT1, DAG, InvertCarry);

	return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Sum, OutFlag);
	}

	static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
	// Let legalize expand this if it isn't a legal type yet.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
	return SDValue();

	SDLoc dl(Op);
	AArch64CC::CondCode CC;
	// The actual operation that sets the overflow or carry flag.
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);

	// We use 0 and 1 as false and true values.
	SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
	SDValue FVal = DAG.getConstant(0, dl, MVT::i32);

	// We use an inverted condition, because the conditional select is inverted
	// too. This will allow it to be selected to a single instruction:
	// CSINC Wd, WZR, WZR, invert(cond).
	SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
	Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
	CCVal, Overflow);

	SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
	return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
	}

	// Prefetch operands are:
	// 1: Address to prefetch
	// 2: bool isWrite
	// 3: int locality (0 = no locality ... 3 = extreme locality)
	// 4: bool isDataCache
	static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
	unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
	unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();

	bool IsStream = !Locality;
	// When the locality number is set
	if (Locality) {
	// The front-end should have filtered out the out-of-range values
	assert(Locality <= 3 && "Prefetch locality out-of-range");
	// The locality degree is the opposite of the cache speed.
	// Put the number the other way around.
	// The encoding starts at 0 for level 1
	Locality = 3 - Locality;
	}

	// built the mask value encoding the expected behavior.
	unsigned PrfOp = (IsWrite << 4) \| // Load/Store bit
	(!IsData << 3) \| // IsDataCache bit
	(Locality << 1) \| // Cache level bits
	(unsigned)IsStream; // Stream bit
	return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
	DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	if (VT.isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthFPExtendToSVE(Op, DAG);

	assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);

	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
	EVT SrcVT = SrcVal.getValueType();

	if (useSVEForFixedLengthVectorVT(SrcVT))
	return LowerFixedLengthFPRoundToSVE(Op, DAG);

	if (SrcVT != MVT::f128) {
	// Expand cases where the input is a vector bigger than NEON.
	if (useSVEForFixedLengthVectorVT(SrcVT))
	return SDValue();

	// It's legal except when f128 is involved
	return Op;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
	SelectionDAG &DAG) const {
	// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
	// Any additional optimization in this function should be recorded
	// in the cost tables.
	bool IsStrict = Op->isStrictFPOpcode();
	EVT InVT = Op.getOperand(IsStrict ? 1 : 0).getValueType();
	EVT VT = Op.getValueType();

	if (VT.isScalableVector()) {
	unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
	? AArch64ISD::FCVTZU_MERGE_PASSTHRU
	: AArch64ISD::FCVTZS_MERGE_PASSTHRU;
	return LowerToPredicatedOp(Op, DAG, Opcode);
	}

	if (useSVEForFixedLengthVectorVT(VT) \|\| useSVEForFixedLengthVectorVT(InVT))
	return LowerFixedLengthFPToIntToSVE(Op, DAG);

	unsigned NumElts = InVT.getVectorNumElements();

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (InVT.getVectorElementType() == MVT::f16 &&
	!Subtarget->hasFullFP16()) {
	MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
	SDLoc dl(Op);
	if (IsStrict) {
	SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
	{Op.getOperand(0), Op.getOperand(1)});
	return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
	{Ext.getValue(1), Ext.getValue(0)});
	}
	return DAG.getNode(
	Op.getOpcode(), dl, Op.getValueType(),
	DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
	}

	uint64_t VTSize = VT.getFixedSizeInBits();
	uint64_t InVTSize = InVT.getFixedSizeInBits();
	if (VTSize < InVTSize) {
	SDLoc dl(Op);
	if (IsStrict) {
	InVT = InVT.changeVectorElementTypeToInteger();
	SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
	{Op.getOperand(0), Op.getOperand(1)});
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
	return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
	}
	SDValue Cv =
	DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
	Op.getOperand(0));
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
	}

	if (VTSize > InVTSize) {
	SDLoc dl(Op);
	MVT ExtVT =
	MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
	VT.getVectorNumElements());
	if (IsStrict) {
	SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
	{Op.getOperand(0), Op.getOperand(1)});
	return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
	{Ext.getValue(1), Ext.getValue(0)});
	}
	SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
	return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
	}

	// Use a scalar operation for conversions between single-element vectors of
	// the same size.
	if (NumElts == 1) {
	SDLoc dl(Op);
	SDValue Extract = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
	Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
	EVT ScalarVT = VT.getScalarType();
	if (IsStrict)
	return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
	{Op.getOperand(0), Extract});
	return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
	}

	// Type changing conversions are illegal.
	return Op;
	}

	SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
	SelectionDAG &DAG) const {
	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

	if (SrcVal.getValueType().isVector())
	return LowerVectorFP_TO_INT(Op, DAG);

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
	SDLoc dl(Op);
	if (IsStrict) {
	SDValue Ext =
	DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
	{Op.getOperand(0), SrcVal});
	return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
	{Ext.getValue(1), Ext.getValue(0)});
	}
	return DAG.getNode(
	Op.getOpcode(), dl, Op.getValueType(),
	DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
	}

	if (SrcVal.getValueType() != MVT::f128) {
	// It's legal except when f128 is involved
	return Op;
	}

	return SDValue();
	}

	SDValue
	AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
	SelectionDAG &DAG) const {
	// AArch64 FP-to-int conversions saturate to the destination element size, so
	// we can lower common saturating conversions to simple instructions.
	SDValue SrcVal = Op.getOperand(0);
	EVT SrcVT = SrcVal.getValueType();
	EVT DstVT = Op.getValueType();
	EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

	uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
	uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
	uint64_t SatWidth = SatVT.getScalarSizeInBits();
	assert(SatWidth <= DstElementWidth &&
	"Saturation width cannot exceed result width");

	// TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
	// Currently, the `llvm.fpto[su]i.sat.*` intrinsics don't accept scalable
	// types, so this is hard to reach.
	if (DstVT.isScalableVector())
	return SDValue();

	EVT SrcElementVT = SrcVT.getVectorElementType();

	// In the absence of FP16 support, promote f16 to f32 and saturate the result.
	if (SrcElementVT == MVT::f16 &&
	(!Subtarget->hasFullFP16() \|\| DstElementWidth > 16)) {
	MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
	SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
	SrcVT = F32VT;
	SrcElementVT = MVT::f32;
	SrcElementWidth = 32;
	} else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
	SrcElementVT != MVT::f16)
	return SDValue();

	SDLoc DL(Op);
	// Cases that we can emit directly.
	if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
	return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
	DAG.getValueType(DstVT.getScalarType()));

	// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
	// result. This is only valid if the legal cvt is larger than the saturate
	// width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
	// (at least until sqxtn is selected).
	if (SrcElementWidth < SatWidth \|\| SrcElementVT == MVT::f64)
	return SDValue();

	EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
	SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
	DAG.getValueType(IntVT.getScalarType()));
	SDValue Sat;
	if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
	SDValue MinC = DAG.getConstant(
	APInt::getSignedMaxValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
	SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
	SDValue MaxC = DAG.getConstant(
	APInt::getSignedMinValue(SatWidth).sext(SrcElementWidth), DL, IntVT);
	Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
	} else {
	SDValue MinC = DAG.getConstant(
	APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
	Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
	}

	return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
	}

	SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
	SelectionDAG &DAG) const {
	// AArch64 FP-to-int conversions saturate to the destination register size, so
	// we can lower common saturating conversions to simple instructions.
	SDValue SrcVal = Op.getOperand(0);
	EVT SrcVT = SrcVal.getValueType();

	if (SrcVT.isVector())
	return LowerVectorFP_TO_INT_SAT(Op, DAG);

	EVT DstVT = Op.getValueType();
	EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	uint64_t SatWidth = SatVT.getScalarSizeInBits();
	uint64_t DstWidth = DstVT.getScalarSizeInBits();
	assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");

	// In the absence of FP16 support, promote f16 to f32 and saturate the result.
	if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
	SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
	SrcVT = MVT::f32;
	} else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
	return SDValue();

	SDLoc DL(Op);
	// Cases that we can emit directly.
	if ((SrcVT == MVT::f64 \|\| SrcVT == MVT::f32 \|\|
	(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
	DstVT == SatVT && (DstVT == MVT::i64 \|\| DstVT == MVT::i32))
	return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
	DAG.getValueType(DstVT));

	// Otherwise we emit a cvt that saturates to a higher BW, and saturate the
	// result. This is only valid if the legal cvt is larger than the saturate
	// width.
	if (DstWidth < SatWidth)
	return SDValue();

	SDValue NativeCvt =
	DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
	SDValue Sat;
	if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
	SDValue MinC = DAG.getConstant(
	APInt::getSignedMaxValue(SatWidth).sext(DstWidth), DL, DstVT);
	SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
	SDValue MaxC = DAG.getConstant(
	APInt::getSignedMinValue(SatWidth).sext(DstWidth), DL, DstVT);
	Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
	} else {
	SDValue MinC = DAG.getConstant(
	APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
	Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
	}

	return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
	}

	SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	// Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
	// Any additional optimization in this function should be recorded
	// in the cost tables.
	bool IsStrict = Op->isStrictFPOpcode();
	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	SDValue In = Op.getOperand(IsStrict ? 1 : 0);
	EVT InVT = In.getValueType();
	unsigned Opc = Op.getOpcode();
	bool IsSigned = Opc == ISD::SINT_TO_FP \|\| Opc == ISD::STRICT_SINT_TO_FP;

	if (VT.isScalableVector()) {
	if (InVT.getVectorElementType() == MVT::i1) {
	// We can't directly extend an SVE predicate; extend it first.
	unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	EVT CastVT = getPromotedVTForPredicate(InVT);
	In = DAG.getNode(CastOpc, dl, CastVT, In);
	return DAG.getNode(Opc, dl, VT, In);
	}

	unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
	: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
	return LowerToPredicatedOp(Op, DAG, Opcode);
	}

	if (useSVEForFixedLengthVectorVT(VT) \|\| useSVEForFixedLengthVectorVT(InVT))
	return LowerFixedLengthIntToFPToSVE(Op, DAG);

	uint64_t VTSize = VT.getFixedSizeInBits();
	uint64_t InVTSize = InVT.getFixedSizeInBits();
	if (VTSize < InVTSize) {
	MVT CastVT =
	MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
	InVT.getVectorNumElements());
	if (IsStrict) {
	In = DAG.getNode(Opc, dl, {CastVT, MVT::Other},
	{Op.getOperand(0), In});
	return DAG.getNode(
	ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
	{In.getValue(1), In.getValue(0), DAG.getIntPtrConstant(0, dl)});
	}
	In = DAG.getNode(Opc, dl, CastVT, In);
	return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
	}

	if (VTSize > InVTSize) {
	unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	EVT CastVT = VT.changeVectorElementTypeToInteger();
	In = DAG.getNode(CastOpc, dl, CastVT, In);
	if (IsStrict)
	return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
	return DAG.getNode(Opc, dl, VT, In);
	}

	// Use a scalar operation for conversions between single-element vectors of
	// the same size.
	if (VT.getVectorNumElements() == 1) {
	SDValue Extract = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
	In, DAG.getConstant(0, dl, MVT::i64));
	EVT ScalarVT = VT.getScalarType();
	if (IsStrict)
	return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
	{Op.getOperand(0), Extract});
	return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
	}

	return Op;
	}

	SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isVector())
	return LowerVectorINT_TO_FP(Op, DAG);

	bool IsStrict = Op->isStrictFPOpcode();
	SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);

	// f16 conversions are promoted to f32 when full fp16 is not supported.
	if (Op.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
	SDLoc dl(Op);
	if (IsStrict) {
	SDValue Val = DAG.getNode(Op.getOpcode(), dl, {MVT::f32, MVT::Other},
	{Op.getOperand(0), SrcVal});
	return DAG.getNode(
	ISD::STRICT_FP_ROUND, dl, {MVT::f16, MVT::Other},
	{Val.getValue(1), Val.getValue(0), DAG.getIntPtrConstant(0, dl)});
	}
	return DAG.getNode(
	ISD::FP_ROUND, dl, MVT::f16,
	DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
	DAG.getIntPtrConstant(0, dl));
	}

	// i128 conversions are libcalls.
	if (SrcVal.getValueType() == MVT::i128)
	return SDValue();

	// Other conversions are legal, unless it's to the completely software-based
	// fp128.
	if (Op.getValueType() != MVT::f128)
	return Op;
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
	SelectionDAG &DAG) const {
	// For iOS, we want to call an alternative entry point: __sincos_stret,
	// which returns the values in two S / D registers.
	SDLoc dl(Op);
	SDValue Arg = Op.getOperand(0);
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());

	ArgListTy Args;
	ArgListEntry Entry;

	Entry.Node = Arg;
	Entry.Ty = ArgTy;
	Entry.IsSExt = false;
	Entry.IsZExt = false;
	Args.push_back(Entry);

	RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
	: RTLIB::SINCOS_STRET_F32;
	const char *LibcallName = getLibcallName(LC);
	SDValue Callee =
	DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));

	StructType *RetTy = StructType::get(ArgTy, ArgTy);
	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(dl)
	.setChain(DAG.getEntryNode())
	.setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
	return CallResult.first;
	}

	static MVT getSVEContainerType(EVT ContentTy);

	SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
	SelectionDAG &DAG) const {
	EVT OpVT = Op.getValueType();
	EVT ArgVT = Op.getOperand(0).getValueType();

	if (useSVEForFixedLengthVectorVT(OpVT))
	return LowerFixedLengthBitcastToSVE(Op, DAG);

	if (OpVT.isScalableVector()) {
	// Bitcasting between unpacked vector types of different element counts is
	// not a NOP because the live elements are laid out differently.
	// 01234567
	// e.g. nxv2i32 = XX??XX??
	// nxv4f16 = X?X?X?X?
	if (OpVT.getVectorElementCount() != ArgVT.getVectorElementCount())
	return SDValue();

	if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
	assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&
	"Expected int->fp bitcast!");
	SDValue ExtResult =
	DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
	Op.getOperand(0));
	return getSVESafeBitCast(OpVT, ExtResult, DAG);
	}
	return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
	}

	if (OpVT != MVT::f16 && OpVT != MVT::bf16)
	return SDValue();

	// Bitcasts between f16 and bf16 are legal.
	if (ArgVT == MVT::f16 \|\| ArgVT == MVT::bf16)
	return Op;

	assert(ArgVT == MVT::i16);
	SDLoc DL(Op);

	Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
	Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
	return SDValue(
	DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
	DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
	0);
	}

	static EVT getExtensionTo64Bits(const EVT &OrigVT) {
	if (OrigVT.getSizeInBits() >= 64)
	return OrigVT;

	assert(OrigVT.isSimple() && "Expecting a simple value type");

	MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
	switch (OrigSimpleTy) {
	default: llvm_unreachable("Unexpected Vector Type");
	case MVT::v2i8:
	case MVT::v2i16:
	return MVT::v2i32;
	case MVT::v4i8:
	return MVT::v4i16;
	}
	}

	static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
	const EVT &OrigTy,
	const EVT &ExtTy,
	unsigned ExtOpcode) {
	// The vector originally had a size of OrigTy. It was then extended to ExtTy.
	// We expect the ExtTy to be 128-bits total. If the OrigTy is less than
	// 64-bits we need to insert a new extension so that it will be 64-bits.
	assert(ExtTy.is128BitVector() && "Unexpected extension size");
	if (OrigTy.getSizeInBits() >= 64)
	return N;

	// Must extend size to at least 64 bits to be used as an operand for VMULL.
	EVT NewVT = getExtensionTo64Bits(OrigTy);

	return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
	}

	static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
	bool isSigned) {
	EVT VT = N->getValueType(0);

	if (N->getOpcode() != ISD::BUILD_VECTOR)
	return false;

	for (const SDValue &Elt : N->op_values()) {
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
	unsigned EltSize = VT.getScalarSizeInBits();
	unsigned HalfSize = EltSize / 2;
	if (isSigned) {
	if (!isIntN(HalfSize, C->getSExtValue()))
	return false;
	} else {
	if (!isUIntN(HalfSize, C->getZExtValue()))
	return false;
	}
	continue;
	}
	return false;
	}

	return true;
	}

	static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
	if (N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ZERO_EXTEND \|\| N->getOpcode() == ISD::ANY_EXTEND)
	return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
	N->getOperand(0)->getValueType(0),
	N->getValueType(0),
	N->getOpcode());

	assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
	EVT VT = N->getValueType(0);
	SDLoc dl(N);
	unsigned EltSize = VT.getScalarSizeInBits() / 2;
	unsigned NumElts = VT.getVectorNumElements();
	MVT TruncVT = MVT::getIntegerVT(EltSize);
	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i != NumElts; ++i) {
	ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
	const APInt &CInt = C->getAPIntValue();
	// Element types smaller than 32 bits are not legal, so use i32 elements.
	// The values are implicitly truncated so sext vs. zext doesn't matter.
	Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
	}
	return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
	}

	static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
	return N->getOpcode() == ISD::SIGN_EXTEND \|\|
	N->getOpcode() == ISD::ANY_EXTEND \|\|
	isExtendedBUILD_VECTOR(N, DAG, true);
	}

	static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
	return N->getOpcode() == ISD::ZERO_EXTEND \|\|
	N->getOpcode() == ISD::ANY_EXTEND \|\|
	isExtendedBUILD_VECTOR(N, DAG, false);
	}

	static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
	}
	return false;
	}

	static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
	unsigned Opcode = N->getOpcode();
	if (Opcode == ISD::ADD \|\| Opcode == ISD::SUB) {
	SDNode *N0 = N->getOperand(0).getNode();
	SDNode *N1 = N->getOperand(1).getNode();
	return N0->hasOneUse() && N1->hasOneUse() &&
	isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
	}
	return false;
	}

	SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
	SelectionDAG &DAG) const {
	// The rounding mode is in bits 23:22 of the FPSCR.
	// The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
	// The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
	// so that the shift + and get folded into a bitfield extract.
	SDLoc dl(Op);

	SDValue Chain = Op.getOperand(0);
	SDValue FPCR_64 = DAG.getNode(
	ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
	{Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
	Chain = FPCR_64.getValue(1);
	SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
	SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
	DAG.getConstant(1U << 22, dl, MVT::i32));
	SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
	DAG.getConstant(22, dl, MVT::i32));
	SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
	DAG.getConstant(3, dl, MVT::i32));
	return DAG.getMergeValues({AND, Chain}, dl);
	}

	SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	SDValue Chain = Op->getOperand(0);
	SDValue RMValue = Op->getOperand(1);

	// The rounding mode is in bits 23:22 of the FPCR.
	// The llvm.set.rounding argument value to the rounding mode in FPCR mapping
	// is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
	// ((arg - 1) & 3) << 22).
	//
	// The argument of llvm.set.rounding must be within the segment [0, 3], so
	// NearestTiesToAway (4) is not handled here. It is responsibility of the code
	// generated llvm.set.rounding to ensure this condition.

	// Calculate new value of FPCR[23:22].
	RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
	DAG.getConstant(1, DL, MVT::i32));
	RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
	DAG.getConstant(0x3, DL, MVT::i32));
	RMValue =
	DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
	DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
	RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);

	// Get current value of FPCR.
	SDValue Ops[] = {
	Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
	SDValue FPCR =
	DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
	Chain = FPCR.getValue(1);
	FPCR = FPCR.getValue(0);

	// Put new rounding mode into FPSCR[23:22].
	const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
	FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
	DAG.getConstant(RMMask, DL, MVT::i64));
	FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
	SDValue Ops2[] = {
	Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
	FPCR};
	return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
	}

	SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	// If SVE is available then i64 vector multiplications can also be made legal.
	bool OverrideNEON = VT == MVT::v2i64 \|\| VT == MVT::v1i64;

	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT, OverrideNEON))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);

	// Multiplications are only custom-lowered for 128-bit vectors so that
	// VMULL can be detected. Otherwise v2i64 multiplications are not legal.
	assert(VT.is128BitVector() && VT.isInteger() &&
	"unexpected type for custom-lowering ISD::MUL");
	SDNode *N0 = Op.getOperand(0).getNode();
	SDNode *N1 = Op.getOperand(1).getNode();
	unsigned NewOpc = 0;
	bool isMLA = false;
	bool isN0SExt = isSignExtended(N0, DAG);
	bool isN1SExt = isSignExtended(N1, DAG);
	if (isN0SExt && isN1SExt)
	NewOpc = AArch64ISD::SMULL;
	else {
	bool isN0ZExt = isZeroExtended(N0, DAG);
	bool isN1ZExt = isZeroExtended(N1, DAG);
	if (isN0ZExt && isN1ZExt)
	NewOpc = AArch64ISD::UMULL;
	else if (isN1SExt \|\| isN1ZExt) {
	// Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
	// into (s/zext A * s/zext C) + (s/zext B * s/zext C)
	if (isN1SExt && isAddSubSExt(N0, DAG)) {
	NewOpc = AArch64ISD::SMULL;
	isMLA = true;
	} else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
	NewOpc = AArch64ISD::UMULL;
	isMLA = true;
	} else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
	std::swap(N0, N1);
	NewOpc = AArch64ISD::UMULL;
	isMLA = true;
	}
	}

	if (!NewOpc) {
	if (VT == MVT::v2i64)
	// Fall through to expand this. It is not legal.
	return SDValue();
	else
	// Other vector multiplications are legal.
	return Op;
	}
	}

	// Legalize to a S/UMULL instruction
	SDLoc DL(Op);
	SDValue Op0;
	SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
	if (!isMLA) {
	Op0 = skipExtensionForVectorMULL(N0, DAG);
	assert(Op0.getValueType().is64BitVector() &&
	Op1.getValueType().is64BitVector() &&
	"unexpected types for extended operands to VMULL");
	return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
	}
	// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
	// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
	// This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
	SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
	SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
	EVT Op1VT = Op1.getValueType();
	return DAG.getNode(N0->getOpcode(), DL, VT,
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
	DAG.getNode(NewOpc, DL, VT,
	DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
	}

	static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
	int Pattern) {
	if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all)
	return DAG.getConstant(1, DL, MVT::nxv1i1);
	return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
	DAG.getTargetConstant(Pattern, DL, MVT::i32));
	}

	// Returns a safe bitcast between two scalable vector predicates, where
	// any newly created lanes from a widening bitcast are defined as zero.
	static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT InVT = Op.getValueType();

	assert(InVT.getVectorElementType() == MVT::i1 &&
	VT.getVectorElementType() == MVT::i1 &&
	"Expected a predicate-to-predicate bitcast");
	assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	InVT.isScalableVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(InVT) &&
	"Only expect to cast between legal scalable predicate types!");

	// Return the operand if the cast isn't changing type,
	// e.g. <n x 16 x i1> -> <n x 16 x i1>
	if (InVT == VT)
	return Op;

	SDValue Reinterpret = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);

	// We only have to zero the lanes if new lanes are being defined, e.g. when
	// casting from <vscale x 2 x i1> to <vscale x 16 x i1>. If this is not the
	// case (e.g. when casting from <vscale x 16 x i1> -> <vscale x 2 x i1>) then
	// we can return here.
	if (InVT.bitsGT(VT))
	return Reinterpret;

	// Check if the other lanes are already known to be zeroed by
	// construction.
	if (isZeroingInactiveLanes(Op))
	return Reinterpret;

	// Zero the newly introduced lanes.
	SDValue Mask = DAG.getConstant(1, DL, InVT);
	Mask = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Mask);
	return DAG.getNode(ISD::AND, DL, VT, Reinterpret, Mask);
	}

	SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntNo = Op.getConstantOperandVal(1);
	SDLoc DL(Op);
	switch (IntNo) {
	default:
	return SDValue(); // Don't custom lower most intrinsics.
	case Intrinsic::aarch64_mops_memset_tag: {
	auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
	SDValue Chain = Node->getChain();
	SDValue Dst = Op.getOperand(2);
	SDValue Val = Op.getOperand(3);
	Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
	SDValue Size = Op.getOperand(4);
	auto Alignment = Node->getMemOperand()->getAlign();
	bool IsVol = Node->isVolatile();
	auto DstPtrInfo = Node->getPointerInfo();

	const auto &SDI =
	static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
	SDValue MS =
	SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
	Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});

	// MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
	// intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
	// LowerOperationWrapper will complain that the number of results has
	// changed.
	return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
	}
	case Intrinsic::aarch64_sme_get_pstatesm: {
	SDValue Chain = Op.getOperand(0);
	SDValue MRS = DAG.getNode(
	AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
	Chain, DAG.getConstant(AArch64SysReg::SVCR, DL, MVT::i64));
	SDValue Mask = DAG.getConstant(/* PSTATE.SM */ 1, DL, MVT::i64);
	SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, MRS, Mask);
	return DAG.getMergeValues({And, Chain}, DL);
	}
	}
	}

	SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDLoc dl(Op);
	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.
	case Intrinsic::thread_pointer: {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
	}
	case Intrinsic::aarch64_neon_abs: {
	EVT Ty = Op.getValueType();
	if (Ty == MVT::i64) {
	SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
	Op.getOperand(1));
	Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
	return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
	} else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
	return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
	} else {
	report_fatal_error("Unexpected type for AArch64 NEON intrinic");
	}
	}
	case Intrinsic::aarch64_neon_smax:
	return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_umax:
	return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_smin:
	return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_neon_umin:
	return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));

	case Intrinsic::aarch64_sve_sunpkhi:
	return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sunpklo:
	return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uunpkhi:
	return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uunpklo:
	return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_clasta_n:
	return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_clastb_n:
	return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_lasta:
	return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_lastb:
	return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_rev:
	return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_tbl:
	return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_trn1:
	return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_trn2:
	return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_uzp1:
	return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_uzp2:
	return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_zip1:
	return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_zip2:
	return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_splice:
	return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::aarch64_sve_ptrue:
	return getPTrue(DAG, dl, Op.getValueType(),
	cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
	case Intrinsic::aarch64_sve_clz:
	return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sme_cntsb:
	return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
	DAG.getConstant(1, dl, MVT::i32));
	case Intrinsic::aarch64_sme_cntsh: {
	SDValue One = DAG.getConstant(1, dl, MVT::i32);
	SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
	return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
	}
	case Intrinsic::aarch64_sme_cntsw: {
	SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
	DAG.getConstant(1, dl, MVT::i32));
	return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
	DAG.getConstant(2, dl, MVT::i32));
	}
	case Intrinsic::aarch64_sme_cntsd: {
	SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
	DAG.getConstant(1, dl, MVT::i32));
	return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
	DAG.getConstant(3, dl, MVT::i32));
	}
	case Intrinsic::aarch64_sve_cnt: {
	SDValue Data = Op.getOperand(3);
	// CTPOP only supports integer operands.
	if (Data.getValueType().isFloatingPoint())
	Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
	return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Data, Op.getOperand(1));
	}
	case Intrinsic::aarch64_sve_dupq_lane:
	return LowerDUPQLane(Op, DAG);
	case Intrinsic::aarch64_sve_convert_from_svbool:
	return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
	case Intrinsic::aarch64_sve_convert_to_svbool:
	return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
	case Intrinsic::aarch64_sve_fneg:
	return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintp:
	return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintm:
	return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frinti:
	return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintx:
	return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frinta:
	return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintn:
	return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frintz:
	return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_ucvtf:
	return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_scvtf:
	return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fcvtzu:
	return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fcvtzs:
	return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_fsqrt:
	return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frecpx:
	return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_frecpe_x:
	return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_frecps_x:
	return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_frsqrte_x:
	return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_frsqrts_x:
	return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::aarch64_sve_fabs:
	return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_abs:
	return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_neg:
	return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_insr: {
	SDValue Scalar = Op.getOperand(2);
	EVT ScalarTy = Scalar.getValueType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);

	return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
	Op.getOperand(1), Scalar);
	}
	case Intrinsic::aarch64_sve_rbit:
	return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
	Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_revb:
	return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_revh:
	return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_revw:
	return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_revd:
	return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxtb:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxth:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_sxtw:
	return DAG.getNode(
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxtb:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxth:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
	Op.getOperand(1));
	case Intrinsic::aarch64_sve_uxtw:
	return DAG.getNode(
	AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
	Op.getOperand(2), Op.getOperand(3),
	DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
	Op.getOperand(1));
	case Intrinsic::localaddress: {
	const auto &MF = DAG.getMachineFunction();
	const auto *RegInfo = Subtarget->getRegisterInfo();
	unsigned Reg = RegInfo->getLocalAddressRegister(MF);
	return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
	Op.getSimpleValueType());
	}

	case Intrinsic::eh_recoverfp: {
	// FIXME: This needs to be implemented to correctly handle highly aligned
	// stack objects. For now we simply return the incoming FP. Refer D53541
	// for more details.
	SDValue FnOp = Op.getOperand(1);
	SDValue IncomingFPOp = Op.getOperand(2);
	GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
	auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
	if (!Fn)
	report_fatal_error(
	"llvm.eh.recoverfp must take a function as the first argument");
	return IncomingFPOp;
	}

	case Intrinsic::aarch64_neon_vsri:
	case Intrinsic::aarch64_neon_vsli: {
	EVT Ty = Op.getValueType();

	if (!Ty.isVector())
	report_fatal_error("Unexpected type for aarch64_neon_vsli");

	assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());

	bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
	unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
	return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
	Op.getOperand(3));
	}

	case Intrinsic::aarch64_neon_srhadd:
	case Intrinsic::aarch64_neon_urhadd:
	case Intrinsic::aarch64_neon_shadd:
	case Intrinsic::aarch64_neon_uhadd: {
	bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd \|\|
	IntNo == Intrinsic::aarch64_neon_shadd);
	bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd \|\|
	IntNo == Intrinsic::aarch64_neon_urhadd);
	unsigned Opcode = IsSignedAdd
	? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
	: (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	}
	case Intrinsic::aarch64_neon_sabd:
	case Intrinsic::aarch64_neon_uabd: {
	unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
	: ISD::ABDS;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2));
	}
	case Intrinsic::aarch64_neon_saddlp:
	case Intrinsic::aarch64_neon_uaddlp: {
	unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
	? AArch64ISD::UADDLP
	: AArch64ISD::SADDLP;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
	}
	case Intrinsic::aarch64_neon_sdot:
	case Intrinsic::aarch64_neon_udot:
	case Intrinsic::aarch64_sve_sdot:
	case Intrinsic::aarch64_sve_udot: {
	unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot \|\|
	IntNo == Intrinsic::aarch64_sve_udot)
	? AArch64ISD::UDOT
	: AArch64ISD::SDOT;
	return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));
	}
	case Intrinsic::get_active_lane_mask: {
	SDValue ID =
	DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl, MVT::i64);
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(), ID,
	Op.getOperand(1), Op.getOperand(2));
	}
	}
	}

	bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
	if (VT.getVectorElementType() == MVT::i8 \|\|
	VT.getVectorElementType() == MVT::i16) {
	EltTy = MVT::i32;
	return true;
	}
	return false;
	}

	bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
	EVT DataVT) const {
	// SVE only supports implicit extension of 32-bit indices.
	if (!Subtarget->hasSVE() \|\| IndexVT.getVectorElementType() != MVT::i32)
	return false;

	// Indices cannot be smaller than the main data type.
	if (IndexVT.getScalarSizeInBits() < DataVT.getScalarSizeInBits())
	return false;

	// Scalable vectors with "vscale * 2" or fewer elements sit within a 64-bit
	// element container type, which would violate the previous clause.
	return DataVT.isFixedLengthVector() \|\| DataVT.getVectorMinNumElements() > 2;
	}

	bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
	return ExtVal.getValueType().isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(
	ExtVal.getValueType(),
	/OverrideNEON=/Subtarget->useSVEForFixedLengthVectors());
	}

	unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
	std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ false),
	AArch64ISD::GLD1_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ false, /Extend/ true),
	AArch64ISD::GLD1_UXTW_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ false),
	AArch64ISD::GLD1_MERGE_ZERO},
	{std::make_tuple(/Scaled/ false, /Signed/ true, /Extend/ true),
	AArch64ISD::GLD1_SXTW_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ false),
	AArch64ISD::GLD1_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ false, /Extend/ true),
	AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ false),
	AArch64ISD::GLD1_SCALED_MERGE_ZERO},
	{std::make_tuple(/Scaled/ true, /Signed/ true, /Extend/ true),
	AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
	};
	auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
	return AddrModes.find(Key)->second;
	}

	unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
	switch (Opcode) {
	default:
	llvm_unreachable("unimplemented opcode");
	return Opcode;
	case AArch64ISD::GLD1_MERGE_ZERO:
	return AArch64ISD::GLD1S_MERGE_ZERO;
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
	}
	}

	SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
	SelectionDAG &DAG) const {
	MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);

	SDLoc DL(Op);
	SDValue Chain = MGT->getChain();
	SDValue PassThru = MGT->getPassThru();
	SDValue Mask = MGT->getMask();
	SDValue BasePtr = MGT->getBasePtr();
	SDValue Index = MGT->getIndex();
	SDValue Scale = MGT->getScale();
	EVT VT = Op.getValueType();
	EVT MemVT = MGT->getMemoryVT();
	ISD::LoadExtType ExtType = MGT->getExtensionType();
	ISD::MemIndexType IndexType = MGT->getIndexType();

	// SVE supports zero (and so undef) passthrough values only, everything else
	// must be handled manually by an explicit select on the load's output.
	if (!PassThru->isUndef() && !isZerosVector(PassThru.getNode())) {
	SDValue Ops[] = {Chain, DAG.getUNDEF(VT), Mask, BasePtr, Index, Scale};
	SDValue Load =
	DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
	MGT->getMemOperand(), IndexType, ExtType);
	SDValue Select = DAG.getSelect(DL, VT, Mask, Load, PassThru);
	return DAG.getMergeValues({Select, Load.getValue(1)}, DL);
	}

	bool IsScaled = MGT->isIndexScaled();
	bool IsSigned = MGT->isIndexSigned();

	// SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
	// must be calculated before hand.
	uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
	if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
	assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
	EVT IndexVT = Index.getValueType();
	Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
	DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
	Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());

	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedGather(MGT->getVTList(), MemVT, DL, Ops,
	MGT->getMemOperand(), IndexType, ExtType);
	}

	// Lower fixed length gather to a scalable equivalent.
	if (VT.isFixedLengthVector()) {
	assert(Subtarget->useSVEForFixedLengthVectors() &&
	"Cannot lower when not using SVE for fixed vectors!");

	// NOTE: Handle floating-point as if integer then bitcast the result.
	EVT DataVT = VT.changeVectorElementTypeToInteger();
	MemVT = MemVT.changeVectorElementTypeToInteger();

	// Find the smallest integer fixed length vector we can use for the gather.
	EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
	if (DataVT.getVectorElementType() == MVT::i64 \|\|
	Index.getValueType().getVectorElementType() == MVT::i64 \|\|
	Mask.getValueType().getVectorElementType() == MVT::i64)
	PromotedVT = VT.changeVectorElementType(MVT::i64);

	// Promote vector operands except for passthrough, which we know is either
	// undef or zero, and thus best constructed directly.
	unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
	Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);

	// A promoted result type forces the need for an extending load.
	if (PromotedVT != DataVT && ExtType == ISD::NON_EXTLOAD)
	ExtType = ISD::EXTLOAD;

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);

	// Convert fixed length vector operands to scalable.
	MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
	Index = convertToScalableVector(DAG, ContainerVT, Index);
	Mask = convertFixedMaskToScalableVector(Mask, DAG);
	PassThru = PassThru->isUndef() ? DAG.getUNDEF(ContainerVT)
	: DAG.getConstant(0, DL, ContainerVT);

	// Emit equivalent scalable vector gather.
	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
	SDValue Load =
	DAG.getMaskedGather(DAG.getVTList(ContainerVT, MVT::Other), MemVT, DL,
	Ops, MGT->getMemOperand(), IndexType, ExtType);

	// Extract fixed length data then convert to the required result type.
	SDValue Result = convertFromScalableVector(DAG, PromotedVT, Load);
	Result = DAG.getNode(ISD::TRUNCATE, DL, DataVT, Result);
	if (VT.isFloatingPoint())
	Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);

	return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
	}

	// Everything else is legal.
	return Op;
	}

	SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
	SelectionDAG &DAG) const {
	MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);

	SDLoc DL(Op);
	SDValue Chain = MSC->getChain();
	SDValue StoreVal = MSC->getValue();
	SDValue Mask = MSC->getMask();
	SDValue BasePtr = MSC->getBasePtr();
	SDValue Index = MSC->getIndex();
	SDValue Scale = MSC->getScale();
	EVT VT = StoreVal.getValueType();
	EVT MemVT = MSC->getMemoryVT();
	ISD::MemIndexType IndexType = MSC->getIndexType();
	bool Truncating = MSC->isTruncatingStore();

	bool IsScaled = MSC->isIndexScaled();
	bool IsSigned = MSC->isIndexSigned();

	// SVE supports an index scaled by sizeof(MemVT.elt) only, everything else
	// must be calculated before hand.
	uint64_t ScaleVal = cast<ConstantSDNode>(Scale)->getZExtValue();
	if (IsScaled && ScaleVal != MemVT.getScalarStoreSize()) {
	assert(isPowerOf2_64(ScaleVal) && "Expecting power-of-two types");
	EVT IndexVT = Index.getValueType();
	Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index,
	DAG.getConstant(Log2_32(ScaleVal), DL, IndexVT));
	Scale = DAG.getTargetConstant(1, DL, Scale.getValueType());

	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
	MSC->getMemOperand(), IndexType, Truncating);
	}

	// Lower fixed length scatter to a scalable equivalent.
	if (VT.isFixedLengthVector()) {
	assert(Subtarget->useSVEForFixedLengthVectors() &&
	"Cannot lower when not using SVE for fixed vectors!");

	// Once bitcast we treat floating-point scatters as if integer.
	if (VT.isFloatingPoint()) {
	VT = VT.changeVectorElementTypeToInteger();
	MemVT = MemVT.changeVectorElementTypeToInteger();
	StoreVal = DAG.getNode(ISD::BITCAST, DL, VT, StoreVal);
	}

	// Find the smallest integer fixed length vector we can use for the scatter.
	EVT PromotedVT = VT.changeVectorElementType(MVT::i32);
	if (VT.getVectorElementType() == MVT::i64 \|\|
	Index.getValueType().getVectorElementType() == MVT::i64 \|\|
	Mask.getValueType().getVectorElementType() == MVT::i64)
	PromotedVT = VT.changeVectorElementType(MVT::i64);

	// Promote vector operands.
	unsigned ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	Index = DAG.getNode(ExtOpcode, DL, PromotedVT, Index);
	Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, PromotedVT, Mask);
	StoreVal = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, StoreVal);

	// A promoted value type forces the need for a truncating store.
	if (PromotedVT != VT)
	Truncating = true;

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, PromotedVT);

	// Convert fixed length vector operands to scalable.
	MemVT = ContainerVT.changeVectorElementType(MemVT.getVectorElementType());
	Index = convertToScalableVector(DAG, ContainerVT, Index);
	Mask = convertFixedMaskToScalableVector(Mask, DAG);
	StoreVal = convertToScalableVector(DAG, ContainerVT, StoreVal);

	// Emit equivalent scalable vector scatter.
	SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedScatter(MSC->getVTList(), MemVT, DL, Ops,
	MSC->getMemOperand(), IndexType, Truncating);
	}

	// Everything else is legal.
	return Op;
	}

	SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
	assert(LoadNode && "Expected custom lowering of a masked load node");
	EVT VT = Op->getValueType(0);

	if (useSVEForFixedLengthVectorVT(
	VT,
	/OverrideNEON=/Subtarget->useSVEForFixedLengthVectors()))
	return LowerFixedLengthVectorMLoadToSVE(Op, DAG);

	SDValue PassThru = LoadNode->getPassThru();
	SDValue Mask = LoadNode->getMask();

	if (PassThru->isUndef() \|\| isZerosVector(PassThru.getNode()))
	return Op;

	SDValue Load = DAG.getMaskedLoad(
	VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
	LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
	LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
	LoadNode->getExtensionType());

	SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);

	return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
	}

	// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
	static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
	EVT VT, EVT MemVT,
	SelectionDAG &DAG) {
	assert(VT.isVector() && "VT should be a vector type");
	assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);

	SDValue Value = ST->getValue();

	// It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
	// the word lane which represent the v4i8 subvector. It optimizes the store
	// to:
	//
	// xtn v0.8b, v0.8h
	// str s0, [x0]

	SDValue Undef = DAG.getUNDEF(MVT::i16);
	SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
	{Undef, Undef, Undef, Undef});

	SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
	Value, UndefVec);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);

	Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
	SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
	Trunc, DAG.getConstant(0, DL, MVT::i64));

	return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
	ST->getBasePtr(), ST->getMemOperand());
	}

	// Custom lowering for any store, vector or scalar and/or default or with
	// a truncate operations. Currently only custom lower truncate operation
	// from vector v4i16 to v4i8 or volatile stores of i128.
	SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc Dl(Op);
	StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
	assert (StoreNode && "Can only custom lower store nodes");

	SDValue Value = StoreNode->getValue();

	EVT VT = Value.getValueType();
	EVT MemVT = StoreNode->getMemoryVT();

	if (VT.isVector()) {
	if (useSVEForFixedLengthVectorVT(
	VT,
	/OverrideNEON=/Subtarget->useSVEForFixedLengthVectors()))
	return LowerFixedLengthVectorStoreToSVE(Op, DAG);

	unsigned AS = StoreNode->getAddressSpace();
	Align Alignment = StoreNode->getAlign();
	if (Alignment < MemVT.getStoreSize() &&
	!allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
	StoreNode->getMemOperand()->getFlags(),
	nullptr)) {
	return scalarizeVectorStore(StoreNode, DAG);
	}

	if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
	MemVT == MVT::v4i8) {
	return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
	}
	// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
	// the custom lowering, as there are no un-paired non-temporal stores and
	// legalization will break up 256 bit inputs.
	ElementCount EC = MemVT.getVectorElementCount();
	if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
	EC.isKnownEven() &&
	((MemVT.getScalarSizeInBits() == 8u \|\|
	MemVT.getScalarSizeInBits() == 16u \|\|
	MemVT.getScalarSizeInBits() == 32u \|\|
	MemVT.getScalarSizeInBits() == 64u))) {
	SDValue Lo =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
	MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
	StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
	SDValue Hi =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
	MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
	StoreNode->getValue(),
	DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
	{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
	StoreNode->getMemoryVT(), StoreNode->getMemOperand());
	return Result;
	}
	} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
	return LowerStore128(Op, DAG);
	} else if (MemVT == MVT::i64x8) {
	SDValue Value = StoreNode->getValue();
	assert(Value->getValueType(0) == MVT::i64x8);
	SDValue Chain = StoreNode->getChain();
	SDValue Base = StoreNode->getBasePtr();
	EVT PtrVT = Base.getValueType();
	for (unsigned i = 0; i < 8; i++) {
	SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
	Value, DAG.getConstant(i, Dl, MVT::i32));
	SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
	DAG.getConstant(i * 8, Dl, PtrVT));
	Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
	StoreNode->getOriginalAlign());
	}
	return Chain;
	}

	return SDValue();
	}

	/// Lower atomic or volatile 128-bit stores to a single STP instruction.
	SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
	SelectionDAG &DAG) const {
	MemSDNode *StoreNode = cast<MemSDNode>(Op);
	assert(StoreNode->getMemoryVT() == MVT::i128);
	assert(StoreNode->isVolatile() \|\| StoreNode->isAtomic());
	assert(!StoreNode->isAtomic() \|\|
	StoreNode->getMergedOrdering() == AtomicOrdering::Unordered \|\|
	StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);

	SDValue Value = StoreNode->getOpcode() == ISD::STORE
	? StoreNode->getOperand(1)
	: StoreNode->getOperand(2);
	SDLoc DL(Op);
	SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
	DAG.getConstant(0, DL, MVT::i64));
	SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
	DAG.getConstant(1, DL, MVT::i64));
	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
	{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
	StoreNode->getMemoryVT(), StoreNode->getMemOperand());
	return Result;
	}

	SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
	assert(LoadNode && "Expected custom lowering of a load node");

	if (LoadNode->getMemoryVT() == MVT::i64x8) {
	SmallVector<SDValue, 8> Ops;
	SDValue Base = LoadNode->getBasePtr();
	SDValue Chain = LoadNode->getChain();
	EVT PtrVT = Base.getValueType();
	for (unsigned i = 0; i < 8; i++) {
	SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
	DAG.getConstant(i * 8, DL, PtrVT));
	SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
	LoadNode->getPointerInfo(),
	LoadNode->getOriginalAlign());
	Ops.push_back(Part);
	Chain = SDValue(Part.getNode(), 1);
	}
	SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
	return DAG.getMergeValues({Loaded, Chain}, DL);
	}

	// Custom lowering for extending v4i8 vector loads.
	EVT VT = Op->getValueType(0);
	assert((VT == MVT::v4i16 \|\| VT == MVT::v4i32) && "Expected v4i16 or v4i32");

	if (LoadNode->getMemoryVT() != MVT::v4i8)
	return SDValue();

	unsigned ExtType;
	if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
	ExtType = ISD::SIGN_EXTEND;
	else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD \|\|
	LoadNode->getExtensionType() == ISD::EXTLOAD)
	ExtType = ISD::ZERO_EXTEND;
	else
	return SDValue();

	SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
	LoadNode->getBasePtr(), MachinePointerInfo());
	SDValue Chain = Load.getValue(1);
	SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
	SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
	SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
	Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
	DAG.getConstant(0, DL, MVT::i64));
	if (VT == MVT::v4i32)
	Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
	return DAG.getMergeValues({Ext, Chain}, DL);
	}

	// Generate SUBS and CSEL for integer abs.
	SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
	MVT VT = Op.getSimpleValueType();

	if (VT.isVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);

	SDLoc DL(Op);
	SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Op.getOperand(0));
	// Generate SUBS & CSEL.
	SDValue Cmp =
	DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
	Op.getOperand(0), DAG.getConstant(0, DL, VT));
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
	DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
	Cmp.getValue(1));
	}

	static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
	SDValue Chain = Op.getOperand(0);
	SDValue Cond = Op.getOperand(1);
	SDValue Dest = Op.getOperand(2);

	AArch64CC::CondCode CC;
	if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
	SDLoc dl(Op);
	SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
	SelectionDAG &DAG) const {
	LLVM_DEBUG(dbgs() << "Custom lowering: ");
	LLVM_DEBUG(Op.dump());

	switch (Op.getOpcode()) {
	default:
	llvm_unreachable("unimplemented operand");
	return SDValue();
	case ISD::BITCAST:
	return LowerBITCAST(Op, DAG);
	case ISD::GlobalAddress:
	return LowerGlobalAddress(Op, DAG);
	case ISD::GlobalTLSAddress:
	return LowerGlobalTLSAddress(Op, DAG);
	case ISD::SETCC:
	case ISD::STRICT_FSETCC:
	case ISD::STRICT_FSETCCS:
	return LowerSETCC(Op, DAG);
	case ISD::BRCOND:
	return LowerBRCOND(Op, DAG);
	case ISD::BR_CC:
	return LowerBR_CC(Op, DAG);
	case ISD::SELECT:
	return LowerSELECT(Op, DAG);
	case ISD::SELECT_CC:
	return LowerSELECT_CC(Op, DAG);
	case ISD::JumpTable:
	return LowerJumpTable(Op, DAG);
	case ISD::BR_JT:
	return LowerBR_JT(Op, DAG);
	case ISD::ConstantPool:
	return LowerConstantPool(Op, DAG);
	case ISD::BlockAddress:
	return LowerBlockAddress(Op, DAG);
	case ISD::VASTART:
	return LowerVASTART(Op, DAG);
	case ISD::VACOPY:
	return LowerVACOPY(Op, DAG);
	case ISD::VAARG:
	return LowerVAARG(Op, DAG);
	case ISD::ADDCARRY:
	return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /unsigned/);
	case ISD::SUBCARRY:
	return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /unsigned/);
	case ISD::SADDO_CARRY:
	return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /signed/);
	case ISD::SSUBO_CARRY:
	return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /signed/);
	case ISD::SADDO:
	case ISD::UADDO:
	case ISD::SSUBO:
	case ISD::USUBO:
	case ISD::SMULO:
	case ISD::UMULO:
	return LowerXALUO(Op, DAG);
	case ISD::FADD:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
	case ISD::FSUB:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
	case ISD::FMUL:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
	case ISD::FMA:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
	case ISD::FDIV:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
	case ISD::FNEG:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
	case ISD::FCEIL:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
	case ISD::FFLOOR:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
	case ISD::FNEARBYINT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
	case ISD::FRINT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
	case ISD::FROUND:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
	case ISD::FROUNDEVEN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
	case ISD::FTRUNC:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
	case ISD::FSQRT:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
	case ISD::FABS:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
	case ISD::FP_ROUND:
	case ISD::STRICT_FP_ROUND:
	return LowerFP_ROUND(Op, DAG);
	case ISD::FP_EXTEND:
	return LowerFP_EXTEND(Op, DAG);
	case ISD::FRAMEADDR:
	return LowerFRAMEADDR(Op, DAG);
	case ISD::SPONENTRY:
	return LowerSPONENTRY(Op, DAG);
	case ISD::RETURNADDR:
	return LowerRETURNADDR(Op, DAG);
	case ISD::ADDROFRETURNADDR:
	return LowerADDROFRETURNADDR(Op, DAG);
	case ISD::CONCAT_VECTORS:
	return LowerCONCAT_VECTORS(Op, DAG);
	case ISD::INSERT_VECTOR_ELT:
	return LowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT:
	return LowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::BUILD_VECTOR:
	return LowerBUILD_VECTOR(Op, DAG);
	case ISD::VECTOR_SHUFFLE:
	return LowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::SPLAT_VECTOR:
	return LowerSPLAT_VECTOR(Op, DAG);
	case ISD::EXTRACT_SUBVECTOR:
	return LowerEXTRACT_SUBVECTOR(Op, DAG);
	case ISD::INSERT_SUBVECTOR:
	return LowerINSERT_SUBVECTOR(Op, DAG);
	case ISD::SDIV:
	case ISD::UDIV:
	return LowerDIV(Op, DAG);
	case ISD::SMIN:
	case ISD::UMIN:
	case ISD::SMAX:
	case ISD::UMAX:
	return LowerMinMax(Op, DAG);
	case ISD::SRA:
	case ISD::SRL:
	case ISD::SHL:
	return LowerVectorSRA_SRL_SHL(Op, DAG);
	case ISD::SHL_PARTS:
	case ISD::SRL_PARTS:
	case ISD::SRA_PARTS:
	return LowerShiftParts(Op, DAG);
	case ISD::CTPOP:
	case ISD::PARITY:
	return LowerCTPOP_PARITY(Op, DAG);
	case ISD::FCOPYSIGN:
	return LowerFCOPYSIGN(Op, DAG);
	case ISD::OR:
	return LowerVectorOR(Op, DAG);
	case ISD::XOR:
	return LowerXOR(Op, DAG);
	case ISD::PREFETCH:
	return LowerPREFETCH(Op, DAG);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	case ISD::STRICT_SINT_TO_FP:
	case ISD::STRICT_UINT_TO_FP:
	return LowerINT_TO_FP(Op, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::STRICT_FP_TO_UINT:
	return LowerFP_TO_INT(Op, DAG);
	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT:
	return LowerFP_TO_INT_SAT(Op, DAG);
	case ISD::FSINCOS:
	return LowerFSINCOS(Op, DAG);
	case ISD::FLT_ROUNDS_:
	return LowerFLT_ROUNDS_(Op, DAG);
	case ISD::SET_ROUNDING:
	return LowerSET_ROUNDING(Op, DAG);
	case ISD::MUL:
	return LowerMUL(Op, DAG);
	case ISD::MULHS:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED);
	case ISD::MULHU:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED);
	case ISD::INTRINSIC_W_CHAIN:
	return LowerINTRINSIC_W_CHAIN(Op, DAG);
	case ISD::INTRINSIC_WO_CHAIN:
	return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::ATOMIC_STORE:
	if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
	assert(Subtarget->hasLSE2());
	return LowerStore128(Op, DAG);
	}
	return SDValue();
	case ISD::STORE:
	return LowerSTORE(Op, DAG);
	case ISD::MSTORE:
	return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
	case ISD::MGATHER:
	return LowerMGATHER(Op, DAG);
	case ISD::MSCATTER:
	return LowerMSCATTER(Op, DAG);
	case ISD::VECREDUCE_SEQ_FADD:
	return LowerVECREDUCE_SEQ_FADD(Op, DAG);
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_AND:
	case ISD::VECREDUCE_OR:
	case ISD::VECREDUCE_XOR:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	case ISD::VECREDUCE_FADD:
	case ISD::VECREDUCE_FMAX:
	case ISD::VECREDUCE_FMIN:
	return LowerVECREDUCE(Op, DAG);
	case ISD::ATOMIC_LOAD_SUB:
	return LowerATOMIC_LOAD_SUB(Op, DAG);
	case ISD::ATOMIC_LOAD_AND:
	return LowerATOMIC_LOAD_AND(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC:
	return LowerDYNAMIC_STACKALLOC(Op, DAG);
	case ISD::VSCALE:
	return LowerVSCALE(Op, DAG);
	case ISD::ANY_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
	case ISD::SIGN_EXTEND_INREG: {
	// Only custom lower when ExtraVT has a legal byte based element type.
	EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
	EVT ExtraEltVT = ExtraVT.getVectorElementType();
	if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
	(ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
	return SDValue();

	return LowerToPredicatedOp(Op, DAG,
	AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
	}
	case ISD::TRUNCATE:
	return LowerTRUNCATE(Op, DAG);
	case ISD::MLOAD:
	return LowerMLOAD(Op, DAG);
	case ISD::LOAD:
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthVectorLoadToSVE(Op, DAG);
	return LowerLOAD(Op, DAG);
	case ISD::ADD:
	case ISD::AND:
	case ISD::SUB:
	return LowerToScalableOp(Op, DAG);
	case ISD::FMAXIMUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
	case ISD::FMAXNUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
	case ISD::FMINIMUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
	case ISD::FMINNUM:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
	case ISD::VSELECT:
	return LowerFixedLengthVectorSelectToSVE(Op, DAG);
	case ISD::ABS:
	return LowerABS(Op, DAG);
	case ISD::ABDS:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
	case ISD::ABDU:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
	case ISD::BITREVERSE:
	return LowerBitreverse(Op, DAG);
	case ISD::BSWAP:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
	case ISD::CTLZ:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU);
	case ISD::CTTZ:
	return LowerCTTZ(Op, DAG);
	case ISD::VECTOR_SPLICE:
	return LowerVECTOR_SPLICE(Op, DAG);
	case ISD::STRICT_LROUND:
	case ISD::STRICT_LLROUND:
	case ISD::STRICT_LRINT:
	case ISD::STRICT_LLRINT: {
	assert(Op.getOperand(1).getValueType() == MVT::f16 &&
	"Expected custom lowering of rounding operations only for f16");
	SDLoc DL(Op);
	SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
	{Op.getOperand(0), Op.getOperand(1)});
	return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
	{Ext.getValue(1), Ext.getValue(0)});
	}
	}
	}

	bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
	return !Subtarget->useSVEForFixedLengthVectors();
	}

	bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
	EVT VT, bool OverrideNEON) const {
	if (!VT.isFixedLengthVector() \|\| !VT.isSimple())
	return false;

	// Don't use SVE for vectors we cannot scalarize if required.
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	// Fixed length predicates should be promoted to i8.
	// NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
	case MVT::i1:
	default:
	return false;
	case MVT::i8:
	case MVT::i16:
	case MVT::i32:
	case MVT::i64:
	case MVT::f16:
	case MVT::f32:
	case MVT::f64:
	break;
	}

	// All SVE implementations support NEON sized vectors.
	if (OverrideNEON && (VT.is128BitVector() \|\| VT.is64BitVector()))
	return Subtarget->hasSVE();

	// Ensure NEON MVTs only belong to a single register class.
	if (VT.getFixedSizeInBits() <= 128)
	return false;

	// Ensure wider than NEON code generation is enabled.
	if (!Subtarget->useSVEForFixedLengthVectors())
	return false;

	// Don't use SVE for types that don't fit.
	if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
	return false;

	// TODO: Perhaps an artificial restriction, but worth having whilst getting
	// the base fixed length SVE support in place.
	if (!VT.isPow2VectorType())
	return false;

	return true;
	}

	//===----------------------------------------------------------------------===//
	// Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	static unsigned getIntrinsicID(const SDNode *N) {
	unsigned Opcode = N->getOpcode();
	switch (Opcode) {
	default:
	return Intrinsic::not_intrinsic;
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
	if (IID < Intrinsic::num_intrinsics)
	return IID;
	return Intrinsic::not_intrinsic;
	}
	}
	}

	bool AArch64TargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
	SDValue N1) const {
	if (!N0.hasOneUse())
	return false;

	unsigned IID = getIntrinsicID(N1.getNode());
	// Avoid reassociating expressions that can be lowered to smlal/umlal.
	if (IID == Intrinsic::aarch64_neon_umull \|\|
	N1.getOpcode() == AArch64ISD::UMULL \|\|
	IID == Intrinsic::aarch64_neon_smull \|\|
	N1.getOpcode() == AArch64ISD::SMULL)
	return N0.getOpcode() != ISD::ADD;

	return true;
	}

	/// Selects the correct CCAssignFn for a given CallingConvention value.
	CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
	bool IsVarArg) const {
	switch (CC) {
	default:
	report_fatal_error("Unsupported calling convention.");
	case CallingConv::WebKit_JS:
	return CC_AArch64_WebKit_JS;
	case CallingConv::GHC:
	return CC_AArch64_GHC;
	case CallingConv::C:
	case CallingConv::Fast:
	case CallingConv::PreserveMost:
	case CallingConv::CXX_FAST_TLS:
	case CallingConv::Swift:
	case CallingConv::SwiftTail:
	case CallingConv::Tail:
	if (Subtarget->isTargetWindows() && IsVarArg)
	return CC_AArch64_Win64_VarArg;
	if (!Subtarget->isTargetDarwin())
	return CC_AArch64_AAPCS;
	if (!IsVarArg)
	return CC_AArch64_DarwinPCS;
	return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
	: CC_AArch64_DarwinPCS_VarArg;
	case CallingConv::Win64:
	return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
	case CallingConv::CFGuard_Check:
	return CC_AArch64_Win64_CFGuard_Check;
	case CallingConv::AArch64_VectorCall:
	case CallingConv::AArch64_SVE_VectorCall:
	return CC_AArch64_AAPCS;
	}
	}

	CCAssignFn *
	AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
	return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
	: RetCC_AArch64_AAPCS;
	}

	SDValue AArch64TargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	const Function &F = MF.getFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

	SmallVector<ISD::OutputArg, 4> Outs;
	GetReturnInfo(CallConv, F.getReturnType(), F.getAttributes(), Outs,
	DAG.getTargetLoweringInfo(), MF.getDataLayout());
	if (any_of(Outs, [](ISD::OutputArg &Out){ return Out.VT.isScalableVector(); }))
	FuncInfo->setIsSVECC(true);

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	DenseMap<unsigned, SDValue> CopiedRegs;
	CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());

	// At this point, Ins[].VT may already be promoted to i32. To correctly
	// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
	// i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
	// Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
	// we use a special version of AnalyzeFormalArguments to pass in ValVT and
	// LocVT.
	unsigned NumArgs = Ins.size();
	Function::const_arg_iterator CurOrigArg = F.arg_begin();
	unsigned CurArgIdx = 0;
	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ValVT = Ins[i].VT;
	if (Ins[i].isOrigArg()) {
	std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
	CurArgIdx = Ins[i].getOrigArgIndex();

	// Get type of the original argument.
	EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
	/AllowUnknown/ true);
	MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
	// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
	if (ActualMVT == MVT::i1 \|\| ActualMVT == MVT::i8)
	ValVT = MVT::i8;
	else if (ActualMVT == MVT::i16)
	ValVT = MVT::i16;
	}
	bool UseVarArgCC = false;
	if (IsWin64)
	UseVarArgCC = isVarArg;
	CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
	bool Res =
	AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
	assert(!Res && "Call operand has unhandled type");
	(void)Res;
	}
	- SmallVector<SDValue, 16> ArgValues;
	+
	unsigned ExtraArgLocs = 0;
	for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i - ExtraArgLocs];

	if (Ins[i].Flags.isByVal()) {
	// Byval is used for HFAs in the PCS, but the system should work in a
	// non-compliant manner for larger structs.
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	int Size = Ins[i].Flags.getByValSize();
	unsigned NumRegs = (Size + 7) / 8;

	// FIXME: This works on big-endian for composite byvals, which are the common
	// case. It should also work for fundamental types too.
	unsigned FrameIdx =
	MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
	SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
	InVals.push_back(FrameIdxN);

	continue;
	}

	if (Ins[i].Flags.isSwiftAsync())
	MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);

	SDValue ArgValue;
	if (VA.isRegLoc()) {
	// Arguments stored in registers.
	EVT RegVT = VA.getLocVT();
	const TargetRegisterClass *RC;

	if (RegVT == MVT::i32)
	RC = &AArch64::GPR32RegClass;
	else if (RegVT == MVT::i64)
	RC = &AArch64::GPR64RegClass;
	else if (RegVT == MVT::f16 \|\| RegVT == MVT::bf16)
	RC = &AArch64::FPR16RegClass;
	else if (RegVT == MVT::f32)
	RC = &AArch64::FPR32RegClass;
	else if (RegVT == MVT::f64 \|\| RegVT.is64BitVector())
	RC = &AArch64::FPR64RegClass;
	else if (RegVT == MVT::f128 \|\| RegVT.is128BitVector())
	RC = &AArch64::FPR128RegClass;
	else if (RegVT.isScalableVector() &&
	RegVT.getVectorElementType() == MVT::i1) {
	FuncInfo->setIsSVECC(true);
	RC = &AArch64::PPRRegClass;
	} else if (RegVT.isScalableVector()) {
	FuncInfo->setIsSVECC(true);
	RC = &AArch64::ZPRRegClass;
	} else
	llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");

	// Transform the arguments in physical registers into virtual ones.
	Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
	ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);

	// If this is an 8, 16 or 32-bit value, it is really passed promoted
	// to 64 bits. Insert an assert[sz]ext to capture this, then
	// truncate to the right size.
	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");
	break;
	case CCValAssign::BCvt:
	ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
	break;
	case CCValAssign::AExt:
	case CCValAssign::SExt:
	case CCValAssign::ZExt:
	break;
	case CCValAssign::AExtUpper:
	ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
	DAG.getConstant(32, DL, RegVT));
	ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
	break;
	}
	} else { // VA.isRegLoc()
	assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
	unsigned ArgOffset = VA.getLocMemOffset();
	unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
	? VA.getLocVT().getSizeInBits()
	: VA.getValVT().getSizeInBits()) / 8;

	uint32_t BEAlign = 0;
	if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
	!Ins[i].Flags.isInConsecutiveRegs())
	BEAlign = 8 - ArgSize;

	int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);

	// Create load nodes to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));

	// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
	MVT MemVT = VA.getValVT();

	switch (VA.getLocInfo()) {
	default:
	break;
	case CCValAssign::Trunc:
	case CCValAssign::BCvt:
	MemVT = VA.getLocVT();
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");
	MemVT = VA.getLocVT();
	break;
	case CCValAssign::SExt:
	ExtType = ISD::SEXTLOAD;
	break;
	case CCValAssign::ZExt:
	ExtType = ISD::ZEXTLOAD;
	break;
	case CCValAssign::AExt:
	ExtType = ISD::EXTLOAD;
	break;
	}

	ArgValue =
	DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
	MachinePointerInfo::getFixedStack(MF, FI), MemVT);
	}

	if (VA.getLocInfo() == CCValAssign::Indirect) {
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");

	uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
	unsigned NumParts = 1;
	if (Ins[i].Flags.isInConsecutiveRegs()) {
	assert(!Ins[i].Flags.isInConsecutiveRegsLast());
	while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
	++NumParts;
	}

	MVT PartLoad = VA.getValVT();
	SDValue Ptr = ArgValue;

	// Ensure we generate all loads for each tuple part, whilst updating the
	// pointer after each load correctly using vscale.
	while (NumParts > 0) {
	ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
	InVals.push_back(ArgValue);
	NumParts--;
	if (NumParts > 0) {
	SDValue BytesIncrement = DAG.getVScale(
	DL, Ptr.getValueType(),
	APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	BytesIncrement, Flags);
	ExtraArgLocs++;
	i++;
	}
	}
	} else {
	if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
	ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
	ArgValue, DAG.getValueType(MVT::i32));

	// i1 arguments are zero-extended to i8 by the caller. Emit a
	// hint to reflect this.
	if (Ins[i].isOrigArg()) {
	Argument *OrigArg = F.getArg(Ins[i].getOrigArgIndex());
	if (OrigArg->getType()->isIntegerTy(1)) {
	if (!Ins[i].Flags.isZExt()) {
	ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
	ArgValue.getValueType(), ArgValue);
	}
	}
	}

	InVals.push_back(ArgValue);
	}
	}
	assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());

	// varargs
	if (isVarArg) {
	if (!Subtarget->isTargetDarwin() \|\| IsWin64) {
	// The AAPCS variadic function ABI is identical to the non-variadic
	// one. As a result there may be more arguments in registers and we should
	// save them for future reference.
	// Win64 variadic functions also pass arguments in registers, but all float
	// arguments are passed in integer registers.
	saveVarArgRegisters(CCInfo, DAG, DL, Chain);
	}

	// This will point to the next argument passed via stack.
	unsigned StackOffset = CCInfo.getNextStackOffset();
	// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
	StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
	FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));

	if (MFI.hasMustTailInVarArgFunc()) {
	SmallVector<MVT, 2> RegParmTypes;
	RegParmTypes.push_back(MVT::i64);
	RegParmTypes.push_back(MVT::f128);
	// Compute the set of forwarded registers. The rest are scratch.
	SmallVectorImpl<ForwardedRegister> &Forwards =
	FuncInfo->getForwardedMustTailRegParms();
	CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
	CC_AArch64_AAPCS);

	// Conservatively forward X8, since it might be used for aggregate return.
	if (!CCInfo.isAllocated(AArch64::X8)) {
	Register X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
	Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
	}
	}
	}

	// On Windows, InReg pointers must be returned, so record the pointer in a
	// virtual register at the start of the function so it can be returned in the
	// epilogue.
	if (IsWin64) {
	for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
	if (Ins[I].Flags.isInReg()) {
	assert(!FuncInfo->getSRetReturnReg());

	MVT PtrTy = getPointerTy(DAG.getDataLayout());
	Register Reg =
	MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
	FuncInfo->setSRetReturnReg(Reg);

	SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
	break;
	}
	}
	}

	unsigned StackArgSize = CCInfo.getNextStackOffset();
	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
	if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
	// This is a non-standard ABI so by fiat I say we're allowed to make full
	// use of the stack area to be popped, which must be aligned to 16 bytes in
	// any case:
	StackArgSize = alignTo(StackArgSize, 16);

	// If we're expected to restore the stack (e.g. fastcc) then we'll be adding
	// a multiple of 16.
	FuncInfo->setArgumentStackToRestore(StackArgSize);

	// This realignment carries over to the available bytes below. Our own
	// callers will guarantee the space is free by giving an aligned value to
	// CALLSEQ_START.
	}
	// Even if we're not expected to free up the space, it's useful to know how
	// much is there while considering tail calls (because we can reuse it).
	FuncInfo->setBytesInStackArgArea(StackArgSize);

	if (Subtarget->hasCustomCallingConv())
	Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);

	return Chain;
	}

	void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
	SelectionDAG &DAG,
	const SDLoc &DL,
	SDValue &Chain) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());

	SmallVector<SDValue, 8> MemOps;

	static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
	AArch64::X3, AArch64::X4, AArch64::X5,
	AArch64::X6, AArch64::X7 };
	static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
	unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);

	unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
	int GPRIdx = 0;
	if (GPRSaveSize != 0) {
	if (IsWin64) {
	GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
	if (GPRSaveSize & 15)
	// The extra size here, if triggered, will always be 8.
	MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
	} else
	GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);

	SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);

	for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
	Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
	SDValue Store =
	DAG.getStore(Val.getValue(1), DL, Val, FIN,
	IsWin64 ? MachinePointerInfo::getFixedStack(
	MF, GPRIdx, (i - FirstVariadicGPR) * 8)
	: MachinePointerInfo::getStack(MF, i * 8));
	MemOps.push_back(Store);
	FIN =
	DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
	}
	}
	FuncInfo->setVarArgsGPRIndex(GPRIdx);
	FuncInfo->setVarArgsGPRSize(GPRSaveSize);

	if (Subtarget->hasFPARMv8() && !IsWin64) {
	static const MCPhysReg FPRArgRegs[] = {
	AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
	AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
	static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
	unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);

	unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
	int FPRIdx = 0;
	if (FPRSaveSize != 0) {
	FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);

	SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);

	for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
	Register VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
	SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);

	SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
	MachinePointerInfo::getStack(MF, i * 16));
	MemOps.push_back(Store);
	FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
	DAG.getConstant(16, DL, PtrVT));
	}
	}
	FuncInfo->setVarArgsFPRIndex(FPRIdx);
	FuncInfo->setVarArgsFPRSize(FPRSaveSize);
	}

	if (!MemOps.empty()) {
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}
	}

	/// LowerCallResult - Lower the result values of a call into the
	/// appropriate copies out of appropriate physical registers.
	SDValue AArch64TargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
	- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	+ const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
	SDValue ThisVal) const {
	- CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	- // Assign locations to each value returned by this call.
	- SmallVector<CCValAssign, 16> RVLocs;
	DenseMap<unsigned, SDValue> CopiedRegs;
	- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	- *DAG.getContext());
	- CCInfo.AnalyzeCallResult(Ins, RetCC);
	-
	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign VA = RVLocs[i];

	// Pass 'this' value directly from the argument to return value, to avoid
	// reg unit interference
	if (i == 0 && isThisReturn) {
	assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
	"unexpected return calling convention register assignment");
	InVals.push_back(ThisVal);
	continue;
	}

	// Avoid copying a physreg twice since RegAllocFast is incompetent and only
	// allows one use of a physreg per block.
	SDValue Val = CopiedRegs.lookup(VA.getLocReg());
	if (!Val) {
	Val =
	DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
	Chain = Val.getValue(1);
	InFlag = Val.getValue(2);
	CopiedRegs[VA.getLocReg()] = Val;
	}

	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
	break;
	case CCValAssign::AExtUpper:
	Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
	DAG.getConstant(32, DL, VA.getLocVT()));
	LLVM_FALLTHROUGH;
	case CCValAssign::AExt:
	LLVM_FALLTHROUGH;
	case CCValAssign::ZExt:
	Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
	break;
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	/// Return true if the calling convention is one that we can guarantee TCO for.
	static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
	return (CC == CallingConv::Fast && GuaranteeTailCalls) \|\|
	CC == CallingConv::Tail \|\| CC == CallingConv::SwiftTail;
	}

	/// Return true if we might ever do TCO for calls with this calling convention.
	static bool mayTailCallThisCC(CallingConv::ID CC) {
	switch (CC) {
	case CallingConv::C:
	case CallingConv::AArch64_SVE_VectorCall:
	case CallingConv::PreserveMost:
	case CallingConv::Swift:
	case CallingConv::SwiftTail:
	case CallingConv::Tail:
	case CallingConv::Fast:
	return true;
	default:
	return false;
	}
	}

	static void analyzeCallOperands(const AArch64TargetLowering &TLI,
	const AArch64Subtarget *Subtarget,
	const TargetLowering::CallLoweringInfo &CLI,
	CCState &CCInfo) {
	const SelectionDAG &DAG = CLI.DAG;
	CallingConv::ID CalleeCC = CLI.CallConv;
	bool IsVarArg = CLI.IsVarArg;
	const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);

	unsigned NumArgs = Outs.size();
	for (unsigned i = 0; i != NumArgs; ++i) {
	MVT ArgVT = Outs[i].VT;
	ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;

	bool UseVarArgCC = false;
	if (IsVarArg) {
	// On Windows, the fixed arguments in a vararg call are passed in GPRs
	// too, so use the vararg CC to force them to integer registers.
	if (IsCalleeWin64) {
	UseVarArgCC = true;
	} else {
	UseVarArgCC = !Outs[i].IsFixed;
	}
	} else {
	// Get type of the original argument.
	EVT ActualVT =
	TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty,
	/AllowUnknown/ true);
	MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT;
	// If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
	if (ActualMVT == MVT::i1 \|\| ActualMVT == MVT::i8)
	ArgVT = MVT::i8;
	else if (ActualMVT == MVT::i16)
	ArgVT = MVT::i16;
	}

	CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
	bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
	assert(!Res && "Call operand has unhandled type");
	(void)Res;
	}
	}

	bool AArch64TargetLowering::isEligibleForTailCallOptimization(
	const CallLoweringInfo &CLI) const {
	CallingConv::ID CalleeCC = CLI.CallConv;
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	SDValue Callee = CLI.Callee;
	bool IsVarArg = CLI.IsVarArg;
	const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	const SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
	const SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
	const SelectionDAG &DAG = CLI.DAG;
	MachineFunction &MF = DAG.getMachineFunction();
	const Function &CallerF = MF.getFunction();
	CallingConv::ID CallerCC = CallerF.getCallingConv();

	// Functions using the C or Fast calling convention that have an SVE signature
	// preserve more registers and should assume the SVE_VectorCall CC.
	// The check for matching callee-saved regs will determine whether it is
	// eligible for TCO.
	if ((CallerCC == CallingConv::C \|\| CallerCC == CallingConv::Fast) &&
	MF.getInfo<AArch64FunctionInfo>()->isSVECC())
	CallerCC = CallingConv::AArch64_SVE_VectorCall;

	bool CCMatch = CallerCC == CalleeCC;

	// When using the Windows calling convention on a non-windows OS, we want
	// to back up and restore X18 in such functions; we can't do a tail call
	// from those functions.
	if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
	CalleeCC != CallingConv::Win64)
	return false;

	// Byval parameters hand the function a pointer directly into the stack area
	// we want to reuse during a tail call. Working around this is possible (see
	// X86) but less efficient and uglier in LowerCall.
	for (Function::const_arg_iterator i = CallerF.arg_begin(),
	e = CallerF.arg_end();
	i != e; ++i) {
	if (i->hasByValAttr())
	return false;

	// On Windows, "inreg" attributes signify non-aggregate indirect returns.
	// In this case, it is necessary to save/restore X0 in the callee. Tail
	// call opt interferes with this. So we disable tail call opt when the
	// caller has an argument with "inreg" attribute.

	// FIXME: Check whether the callee also has an "inreg" argument.
	if (i->hasInRegAttr())
	return false;
	}

	if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
	return CCMatch;

	// Externally-defined functions with weak linkage should not be
	// tail-called on AArch64 when the OS does not support dynamic
	// pre-emption of symbols, as the AAELF spec requires normal calls
	// to undefined weak functions to be replaced with a NOP or jump to the
	// next instruction. The behaviour of branch instructions in this
	// situation (as used for tail calls) is implementation-defined, so we
	// cannot rely on the linker replacing the tail call with a return.
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	const GlobalValue *GV = G->getGlobal();
	const Triple &TT = getTargetMachine().getTargetTriple();
	if (GV->hasExternalWeakLinkage() &&
	(!TT.isOSWindows() \|\| TT.isOSBinFormatELF() \|\| TT.isOSBinFormatMachO()))
	return false;
	}

	// Now we search for cases where we can use a tail call without changing the
	// ABI. Sibcall is used in some places (particularly gcc) to refer to this
	// concept.

	// I want anyone implementing a new calling convention to think long and hard
	// about this assert.
	assert((!IsVarArg \|\| CalleeCC == CallingConv::C) &&
	"Unexpected variadic calling convention");

	LLVMContext &C = *DAG.getContext();
	// Check that the call results are passed in the same way.
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
	CCAssignFnForCall(CalleeCC, IsVarArg),
	CCAssignFnForCall(CallerCC, IsVarArg)))
	return false;
	// The callee has to preserve all registers the caller needs to preserve.
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
	if (!CCMatch) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (Subtarget->hasCustomCallingConv()) {
	TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
	TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
	}
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	// Nothing more to check if the callee is taking no arguments
	if (Outs.empty())
	return true;

	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);

	analyzeCallOperands(*this, Subtarget, CLI, CCInfo);

	if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) {
	// When we are musttail, additional checks have been done and we can safely ignore this check
	// At least two cases here: if caller is fastcc then we can't have any
	// memory arguments (we'd be expected to clean up the stack afterwards). If
	// caller is C then we could potentially use its argument area.

	// FIXME: for now we take the most conservative of these in both cases:
	// disallow all variadic memory operands.
	for (const CCValAssign &ArgLoc : ArgLocs)
	if (!ArgLoc.isRegLoc())
	return false;
	}

	const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

	// If any of the arguments is passed indirectly, it must be SVE, so the
	// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
	// allocate space on the stack. That is why we determine this explicitly here
	// the call cannot be a tailcall.
	if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
	assert((A.getLocInfo() != CCValAssign::Indirect \|\|
	A.getValVT().isScalableVector()) &&
	"Expected value to be scalable");
	return A.getLocInfo() == CCValAssign::Indirect;
	}))
	return false;

	// If the stack arguments for this call do not fit into our own save area then
	// the call cannot be made tail.
	if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
	return false;

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
	return false;

	return true;
	}

	SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
	SelectionDAG &DAG,
	MachineFrameInfo &MFI,
	int ClobberedFI) const {
	SmallVector<SDValue, 8> ArgChains;
	int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
	int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;

	// Include the original chain at the beginning of the list. When this is
	// used by target LowerCall hooks, this helps legalize find the
	// CALLSEQ_BEGIN node.
	ArgChains.push_back(Chain);

	// Add a chain value for each stack argument corresponding
	for (SDNode *U : DAG.getEntryNode().getNode()->uses())
	if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
	if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
	if (FI->getIndex() < 0) {
	int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
	int64_t InLastByte = InFirstByte;
	InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;

	if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) \|\|
	(FirstByte <= InFirstByte && InFirstByte <= LastByte))
	ArgChains.push_back(SDValue(L, 1));
	}

	// Build a tokenfactor for all the chains.
	return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
	}

	bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
	bool TailCallOpt) const {
	return (CallCC == CallingConv::Fast && TailCallOpt) \|\|
	CallCC == CallingConv::Tail \|\| CallCC == CallingConv::SwiftTail;
	}

	// Check if the value is zero-extended from i1 to i8
	static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
	unsigned SizeInBits = Arg.getValueType().getSizeInBits();
	if (SizeInBits < 8)
	return false;

	APInt RequredZero(SizeInBits, 0xFE);
	KnownBits Bits = DAG.computeKnownBits(Arg, 4);
	bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
	return ZExtBool;
	}

	/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
	/// and add input and output parameter nodes.
	SDValue
	AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &DL = CLI.DL;
	SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
	SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &IsTailCall = CLI.IsTailCall;
	CallingConv::ID &CallConv = CLI.CallConv;
	bool IsVarArg = CLI.IsVarArg;

	MachineFunction &MF = DAG.getMachineFunction();
	MachineFunction::CallSiteInfo CSInfo;
	bool IsThisReturn = false;

	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
	bool IsSibCall = false;
	bool GuardWithBTI = false;

	if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
	!Subtarget->noBTIAtReturnTwice()) {
	GuardWithBTI = FuncInfo->branchTargetEnforcement();
	}

	+ // Analyze operands of the call, assigning locations to each operand.
	+ SmallVector<CCValAssign, 16> ArgLocs;
	+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
	+
	+ if (IsVarArg) {
	+ unsigned NumArgs = Outs.size();
	+
	+ for (unsigned i = 0; i != NumArgs; ++i) {
	+ if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
	+ report_fatal_error("Passing SVE types to variadic functions is "
	+ "currently not supported");
	+ }
	+ }
	+
	+ analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
	+
	+ CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	+ // Assign locations to each value returned by this call.
	+ SmallVector<CCValAssign, 16> RVLocs;
	+ CCState RetCCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	+ *DAG.getContext());
	+ RetCCInfo.AnalyzeCallResult(Ins, RetCC);
	+
	// Check callee args/returns for SVE registers and set calling convention
	// accordingly.
	if (CallConv == CallingConv::C \|\| CallConv == CallingConv::Fast) {
	- bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
	- return Out.VT.isScalableVector();
	- });
	- bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
	- return In.VT.isScalableVector();
	- });
	-
	- if (CalleeInSVE \|\| CalleeOutSVE)
	+ auto HasSVERegLoc = [](CCValAssign &Loc) {
	+ if (!Loc.isRegLoc())
	+ return false;
	+ return AArch64::ZPRRegClass.contains(Loc.getLocReg()) \|\|
	+ AArch64::PPRRegClass.contains(Loc.getLocReg());
	+ };
	+ if (any_of(RVLocs, HasSVERegLoc) \|\| any_of(ArgLocs, HasSVERegLoc))
	CallConv = CallingConv::AArch64_SVE_VectorCall;
	}

	if (IsTailCall) {
	// Check if it's really possible to do a tail call.
	IsTailCall = isEligibleForTailCallOptimization(CLI);

	// A sibling call is one where we're under the usual C ABI and not planning
	// to change that but can still do a tail call:
	if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
	CallConv != CallingConv::SwiftTail)
	IsSibCall = true;

	if (IsTailCall)
	++NumTailCalls;
	}

	if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");

	- // Analyze operands of the call, assigning locations to each operand.
	- SmallVector<CCValAssign, 16> ArgLocs;
	- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
	-
	- if (IsVarArg) {
	- unsigned NumArgs = Outs.size();
	-
	- for (unsigned i = 0; i != NumArgs; ++i) {
	- if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector())
	- report_fatal_error("Passing SVE types to variadic functions is "
	- "currently not supported");
	- }
	- }
	-
	- analyzeCallOperands(*this, Subtarget, CLI, CCInfo);
	-
	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getNextStackOffset();

	if (IsSibCall) {
	// Since we're not changing the ABI to make this a tail call, the memory
	// operands are already available in the caller's incoming argument space.
	NumBytes = 0;
	}

	// FPDiff is the byte offset of the call's argument area from the callee's.
	// Stores to callee stack arguments will be placed in FixedStackSlots offset
	// by this amount for a tail call. In a sibling call it must be 0 because the
	// caller will deallocate the entire stack and the callee still expects its
	// arguments to begin at SP+0. Completely unused for non-tail calls.
	int FPDiff = 0;

	if (IsTailCall && !IsSibCall) {
	unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();

	// Since callee will pop argument stack as a tail call, we must keep the
	// popped size 16-byte aligned.
	NumBytes = alignTo(NumBytes, 16);

	// FPDiff will be negative if this tail call requires more space than we
	// would automatically have in our incoming argument space. Positive if we
	// can actually shrink the stack.
	FPDiff = NumReusableBytes - NumBytes;

	// Update the required reserved area if this is the tail call requiring the
	// most argument stack space.
	if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
	FuncInfo->setTailCallReservedStack(-FPDiff);

	// The stack pointer must be 16-byte aligned at all times it's used for a
	// memory operation, which in practice means at all times and in
	// particular across call boundaries. Therefore our own arguments started at
	// a 16-byte aligned SP and the delta applied for the tail call should
	// satisfy the same constraint.
	assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
	}

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	if (!IsSibCall)
	Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);

	SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
	getPointerTy(DAG.getDataLayout()));

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallSet<unsigned, 8> RegsUsed;
	SmallVector<SDValue, 8> MemOpChains;
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
	const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
	for (const auto &F : Forwards) {
	SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
	RegsToPass.emplace_back(F.PReg, Val);
	}
	}

	// Walk the register/memloc assignments, inserting copies/loads.
	unsigned ExtraArgLocs = 0;
	for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
	SDValue Arg = OutVals[i];
	ISD::ArgFlagsTy Flags = Outs[i].Flags;

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	if (Outs[i].ArgVT == MVT::i1) {
	// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
	//
	// Check if we actually have to do this, because the value may
	// already be zero-extended.
	//
	// We cannot just emit a (zext i8 (trunc (assert-zext i8)))
	// and rely on DAGCombiner to fold this, because the following
	// (anyext i32) is combined with (zext i8) in DAG.getNode:
	//
	// (ext (zext x)) -> (zext x)
	//
	// This will give us (zext i32), which we cannot remove, so
	// try to check this beforehand.
	if (!checkZExtBool(Arg, DAG)) {
	Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
	}
	}
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExtUpper:
	assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
	DAG.getConstant(32, DL, VA.getLocVT()));
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getBitcast(VA.getLocVT(), Arg);
	break;
	case CCValAssign::Trunc:
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	break;
	case CCValAssign::FPExt:
	Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::Indirect:
	assert(VA.getValVT().isScalableVector() &&
	"Only scalable vectors can be passed indirectly");

	uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
	uint64_t PartSize = StoreSize;
	unsigned NumParts = 1;
	if (Outs[i].Flags.isInConsecutiveRegs()) {
	assert(!Outs[i].Flags.isInConsecutiveRegsLast());
	while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
	++NumParts;
	StoreSize *= NumParts;
	}

	MachineFrameInfo &MFI = MF.getFrameInfo();
	Type Ty = EVT(VA.getValVT()).getTypeForEVT(DAG.getContext());
	Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
	int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
	MFI.setStackID(FI, TargetStackID::ScalableVector);

	MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
	SDValue Ptr = DAG.getFrameIndex(
	FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
	SDValue SpillSlot = Ptr;

	// Ensure we generate all stores for each tuple part, whilst updating the
	// pointer after each store correctly using vscale.
	while (NumParts) {
	Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
	NumParts--;
	if (NumParts > 0) {
	SDValue BytesIncrement = DAG.getVScale(
	DL, Ptr.getValueType(),
	APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(true);

	MPI = MachinePointerInfo(MPI.getAddrSpace());
	Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
	BytesIncrement, Flags);
	ExtraArgLocs++;
	i++;
	}
	}

	Arg = SpillSlot;
	break;
	}

	if (VA.isRegLoc()) {
	if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
	Outs[0].VT == MVT::i64) {
	assert(VA.getLocVT() == MVT::i64 &&
	"unexpected calling convention register assignment");
	assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
	"unexpected use of 'returned'");
	IsThisReturn = true;
	}
	if (RegsUsed.count(VA.getLocReg())) {
	// If this register has already been used then we're trying to pack
	// parts of an [N x i32] into an X-register. The extension type will
	// take care of putting the two halves in the right place but we have to
	// combine them.
	SDValue &Bits =
	llvm::find_if(RegsToPass,
	[=](const std::pair<unsigned, SDValue> &Elt) {
	return Elt.first == VA.getLocReg();
	})
	->second;
	Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
	// Call site info is used for function's parameter entry value
	// tracking. For now we track only simple cases when parameter
	// is transferred through whole register.
	llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
	return ArgReg.Reg == VA.getLocReg();
	});
	} else {
	RegsToPass.emplace_back(VA.getLocReg(), Arg);
	RegsUsed.insert(VA.getLocReg());
	const TargetOptions &Options = DAG.getTarget().Options;
	if (Options.EmitCallSiteInfo)
	CSInfo.emplace_back(VA.getLocReg(), i);
	}
	} else {
	assert(VA.isMemLoc());

	SDValue DstAddr;
	MachinePointerInfo DstInfo;

	// FIXME: This works on big-endian for composite byvals, which are the
	// common case. It should also work for fundamental types too.
	uint32_t BEAlign = 0;
	unsigned OpSize;
	if (VA.getLocInfo() == CCValAssign::Indirect \|\|
	VA.getLocInfo() == CCValAssign::Trunc)
	OpSize = VA.getLocVT().getFixedSizeInBits();
	else
	OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
	: VA.getValVT().getSizeInBits();
	OpSize = (OpSize + 7) / 8;
	if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
	!Flags.isInConsecutiveRegs()) {
	if (OpSize < 8)
	BEAlign = 8 - OpSize;
	}
	unsigned LocMemOffset = VA.getLocMemOffset();
	int32_t Offset = LocMemOffset + BEAlign;
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
	PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);

	if (IsTailCall) {
	Offset = Offset + FPDiff;
	int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);

	DstAddr = DAG.getFrameIndex(FI, PtrVT);
	DstInfo = MachinePointerInfo::getFixedStack(MF, FI);

	// Make sure any stack arguments overlapping with where we're storing
	// are loaded before this eventual operation. Otherwise they'll be
	// clobbered.
	Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
	} else {
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);

	DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
	DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
	}

	if (Outs[i].Flags.isByVal()) {
	SDValue SizeNode =
	DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
	SDValue Cpy = DAG.getMemcpy(
	Chain, DL, DstAddr, Arg, SizeNode,
	Outs[i].Flags.getNonZeroByValAlign(),
	/isVol = / false, /AlwaysInline = / false,
	/isTailCall = / false, DstInfo, MachinePointerInfo());

	MemOpChains.push_back(Cpy);
	} else {
	// Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
	// promoted to a legal register type i32, we should truncate Arg back to
	// i1/i8/i16.
	if (VA.getValVT() == MVT::i1 \|\| VA.getValVT() == MVT::i8 \|\|
	VA.getValVT() == MVT::i16)
	Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);

	SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
	MemOpChains.push_back(Store);
	}
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InFlag;
	for (auto &RegToPass : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
	RegToPass.second, InFlag);
	InFlag = Chain.getValue(1);
	}

	// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
	// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
	// node so that legalize doesn't hack it.
	if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	auto GV = G->getGlobal();
	unsigned OpFlags =
	Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
	if (OpFlags & AArch64II::MO_GOT) {
	Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
	Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
	} else {
	const GlobalValue *GV = G->getGlobal();
	Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
	}
	} else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	Subtarget->isTargetMachO()) {
	const char *Sym = S->getSymbol();
	Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
	Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
	} else {
	const char *Sym = S->getSymbol();
	Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
	}
	}

	// We don't usually want to end the call-sequence here because we would tidy
	// the frame up after the call, however in the ABI-changing tail-call case
	// we've carefully laid out the parameters so that when sp is reset they'll be
	// in the correct location.
	if (IsTailCall && !IsSibCall) {
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
	DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
	InFlag = Chain.getValue(1);
	}

	std::vector<SDValue> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);

	if (IsTailCall) {
	// Each tail call may have to adjust the stack by a different amount, so
	// this information must travel along with the operation for eventual
	// consumption by emitEpilogue.
	Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
	}

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (auto &RegToPass : RegsToPass)
	Ops.push_back(DAG.getRegister(RegToPass.first,
	RegToPass.second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	const uint32_t *Mask;
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	if (IsThisReturn) {
	// For 'this' returns, use the X0-preserving mask if applicable
	Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
	if (!Mask) {
	IsThisReturn = false;
	Mask = TRI->getCallPreservedMask(MF, CallConv);
	}
	} else
	Mask = TRI->getCallPreservedMask(MF, CallConv);

	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(MF, &Mask);

	if (TRI->isAnyArgRegReserved(MF))
	TRI->emitReservedArgRegCallError(MF);

	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	// If we're doing a tall call, use a TC_RETURN here rather than an
	// actual call instruction.
	if (IsTailCall) {
	MF.getFrameInfo().setHasTailCall();
	SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
	DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
	return Ret;
	}

	unsigned CallOpc = AArch64ISD::CALL;
	// Calls with operand bundle "clang.arc.attachedcall" are special. They should
	// be expanded to the call, directly followed by a special marker sequence and
	// a call to an ObjC library function. Use CALL_RVMARKER to do that.
	if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
	assert(!IsTailCall &&
	"tail calls cannot be marked with clang.arc.attachedcall");
	CallOpc = AArch64ISD::CALL_RVMARKER;

	// Add a target global address for the retainRV/claimRV runtime function
	// just before the call target.
	Function ARCFn = objcarc::getAttachedARCFunction(CLI.CB);
	auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
	Ops.insert(Ops.begin() + 1, GA);
	} else if (GuardWithBTI)
	CallOpc = AArch64ISD::CALL_BTI;

	// Returns a chain and a flag for retval copy to use.
	Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
	DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
	InFlag = Chain.getValue(1);
	DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));

	uint64_t CalleePopBytes =
	DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
	DAG.getIntPtrConstant(CalleePopBytes, DL, true),
	InFlag, DL);
	if (!Ins.empty())
	InFlag = Chain.getValue(1);

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
	+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, RVLocs, DL, DAG,
	InVals, IsThisReturn,
	IsThisReturn ? OutVals[0] : SDValue());
	}

	bool AArch64TargetLowering::CanLowerReturn(
	CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, RetCC);
	}

	SDValue
	AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	auto &MF = DAG.getMachineFunction();
	auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();

	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
	CCInfo.AnalyzeReturn(Outs, RetCC);

	// Copy the result values into the output registers.
	SDValue Flag;
	SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
	SmallSet<unsigned, 4> RegsUsed;
	for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
	++i, ++realRVLocIdx) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");
	SDValue Arg = OutVals[realRVLocIdx];

	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full:
	if (Outs[i].ArgVT == MVT::i1) {
	// AAPCS requires i1 to be zero-extended to i8 by the producer of the
	// value. This is strictly redundant on Darwin (which uses "zeroext
	// i1"), but will be optimised out before ISel.
	Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	}
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	case CCValAssign::ZExt:
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	break;
	case CCValAssign::AExtUpper:
	assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
	Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
	Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
	DAG.getConstant(32, DL, VA.getLocVT()));
	break;
	}

	if (RegsUsed.count(VA.getLocReg())) {
	SDValue &Bits =
	llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
	return Elt.first == VA.getLocReg();
	})->second;
	Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
	} else {
	RetVals.emplace_back(VA.getLocReg(), Arg);
	RegsUsed.insert(VA.getLocReg());
	}
	}

	SmallVector<SDValue, 4> RetOps(1, Chain);
	for (auto &RetVal : RetVals) {
	Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(
	DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
	}

	// Windows AArch64 ABIs require that for returning structs by value we copy
	// the sret argument into X0 for the return.
	// We saved the argument into a virtual register in the entry block,
	// so now we copy the value out and into X0.
	if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
	SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
	getPointerTy(MF.getDataLayout()));

	unsigned RetValReg = AArch64::X0;
	Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
	Flag = Chain.getValue(1);

	RetOps.push_back(
	DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
	}

	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
	if (I) {
	for (; *I; ++I) {
	if (AArch64::GPR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
	else if (AArch64::FPR64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
	}

	//===----------------------------------------------------------------------===//
	// Other Lowering Code
	//===----------------------------------------------------------------------===//

	SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
	N->getOffset(), Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
	N->getOffset(), Flag);
	}

	SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
	SelectionDAG &DAG,
	unsigned Flag) const {
	return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
	}

	// (loadGOT sym)
	template <class NodeTy>
	SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT \| Flags);
	// FIXME: Once remat is capable of dealing with instructions with register
	// operands, expand this into two nodes instead of using a wrapper node.
	return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
	}

	// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	const unsigned char MO_NC = AArch64II::MO_NC;
	return DAG.getNode(
	AArch64ISD::WrapperLarge, DL, Ty,
	getTargetNode(N, Ty, DAG, AArch64II::MO_G3 \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G2 \| MO_NC \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G1 \| MO_NC \| Flags),
	getTargetNode(N, Ty, DAG, AArch64II::MO_G0 \| MO_NC \| Flags));
	}

	// (addlow (adrp %hi(sym)) %lo(sym))
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE \| Flags);
	SDValue Lo = getTargetNode(N, Ty, DAG,
	AArch64II::MO_PAGEOFF \| AArch64II::MO_NC \| Flags);
	SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
	return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
	}

	// (adr sym)
	template <class NodeTy>
	SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
	unsigned Flags) const {
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
	SDLoc DL(N);
	EVT Ty = getPointerTy(DAG.getDataLayout());
	SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
	return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
	}

	SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
	SelectionDAG &DAG) const {
	GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
	const GlobalValue *GV = GN->getGlobal();
	unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());

	if (OpFlags != AArch64II::MO_NO_FLAG)
	assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
	"unexpected offset in global node");

	// This also catches the large code model case for Darwin, and tiny code
	// model with got relocations.
	if ((OpFlags & AArch64II::MO_GOT) != 0) {
	return getGOT(GN, DAG, OpFlags);
	}

	SDValue Result;
	if (getTargetMachine().getCodeModel() == CodeModel::Large) {
	Result = getAddrLarge(GN, DAG, OpFlags);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	Result = getAddrTiny(GN, DAG, OpFlags);
	} else {
	Result = getAddr(GN, DAG, OpFlags);
	}
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(GN);
	if (OpFlags & (AArch64II::MO_DLLIMPORT \| AArch64II::MO_COFFSTUB))
	Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	return Result;
	}

	/// Convert a TLS address reference into the correct sequence of loads
	/// and calls to compute the variable's address (for Darwin, currently) and
	/// return an SDValue containing the final node.

	/// Darwin only has one TLS scheme which must be capable of dealing with the
	/// fully general situation, in the worst case. This means:
	/// + "extern __thread" declaration.
	/// + Defined in a possibly unknown dynamic library.
	///
	/// The general system is that each __thread variable has a [3 x i64] descriptor
	/// which contains information used by the runtime to calculate the address. The
	/// only part of this the compiler needs to know about is the first xword, which
	/// contains a function pointer that must be called with the address of the
	/// entire descriptor in "x0".
	///
	/// Since this descriptor may be in a different unit, in general even the
	/// descriptor must be accessed via an indirect load. The "ideal" code sequence
	/// is:
	/// adrp x0, _var@TLVPPAGE
	/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
	/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
	/// ; the function pointer
	/// blr x1 ; Uses descriptor address in x0
	/// ; Address of _var is now in x0.
	///
	/// If the address of _var's descriptor is known to the linker, then it can
	/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
	/// a slight efficiency gain.
	SDValue
	AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin() &&
	"This function expects a Darwin target");

	SDLoc DL(Op);
	MVT PtrVT = getPointerTy(DAG.getDataLayout());
	MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();

	SDValue TLVPAddr =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
	SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);

	// The first entry in the descriptor is a function pointer that we must call
	// to obtain the address of the variable.
	SDValue Chain = DAG.getEntryNode();
	SDValue FuncTLVGet = DAG.getLoad(
	PtrMemVT, DL, Chain, DescAddr,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()),
	Align(PtrMemVT.getSizeInBits() / 8),
	MachineMemOperand::MOInvariant \| MachineMemOperand::MODereferenceable);
	Chain = FuncTLVGet.getValue(1);

	// Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
	FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setAdjustsStack(true);

	// TLS calls preserve all registers except those that absolutely must be
	// trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
	// silly).
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask = TRI->getTLSCallPreservedMask();
	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);

	// Finally, we can make the call. This is just a degenerate version of a
	// normal AArch64 call node: x0 takes the address of the descriptor, and
	// returns the address of the variable in this thread.
	Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
	Chain =
	DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
	Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
	DAG.getRegisterMask(Mask), Chain.getValue(1));
	return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
	}

	/// Convert a thread-local variable reference into a sequence of instructions to
	/// compute the variable's address for the local exec TLS model of ELF targets.
	/// The sequence depends on the maximum TLS area size.
	SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
	SDValue ThreadBase,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue TPOff, Addr;

	switch (DAG.getTarget().Options.TLSSize) {
	default:
	llvm_unreachable("Unexpected TLS size");

	case 12: {
	// mrs x0, TPIDR_EL0
	// add x0, x0, :tprel_lo12:a
	SDValue Var = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF);
	return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
	Var,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	}

	case 24: {
	// mrs x0, TPIDR_EL0
	// add x0, x0, :tprel_hi12:a
	// add x0, x0, :tprel_lo12_nc:a
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
	Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
	HiVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
	LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	}

	case 32: {
	// mrs x1, TPIDR_EL0
	// movz x0, #:tprel_g1:a
	// movk x0, #:tprel_g0_nc:a
	// add x0, x1, x0
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_G1);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G0 \| AArch64II::MO_NC);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
	DAG.getTargetConstant(16, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}

	case 48: {
	// mrs x1, TPIDR_EL0
	// movz x0, #:tprel_g2:a
	// movk x0, #:tprel_g1_nc:a
	// movk x0, #:tprel_g0_nc:a
	// add x0, x1, x0
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_G2);
	SDValue MiVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G1 \| AArch64II::MO_NC);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_G0 \| AArch64II::MO_NC);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
	DAG.getTargetConstant(32, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
	DAG.getTargetConstant(16, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}
	}
	}

	/// When accessing thread-local variables under either the general-dynamic or
	/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
	/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
	/// is a function pointer to carry out the resolution.
	///
	/// The sequence is:
	/// adrp x0, :tlsdesc:var
	/// ldr x1, [x0, #:tlsdesc_lo12:var]
	/// add x0, x0, #:tlsdesc_lo12:var
	/// .tlsdesccall var
	/// blr x1
	/// (TPIDR_EL0 offset now in x0)
	///
	/// The above sequence must be produced unscheduled, to enable the linker to
	/// optimize/relax this sequence.
	/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
	/// above sequence, and expanded really late in the compilation flow, to ensure
	/// the sequence is produced as per above.
	SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	SDValue Chain = DAG.getEntryNode();
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	Chain =
	DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
	SDValue Glue = Chain.getValue(1);

	return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
	}

	SDValue
	AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetELF() && "This function expects an ELF target");

	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);

	TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());

	if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
	if (Model == TLSModel::LocalDynamic)
	Model = TLSModel::GeneralDynamic;
	}

	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	Model != TLSModel::LocalExec)
	report_fatal_error("ELF TLS only supported in small memory model or "
	"in local exec TLS model");
	// Different choices can be made for the maximum size of the TLS area for a
	// module. For the small address model, the default TLS size is 16MiB and the
	// maximum TLS size is 4GiB.
	// FIXME: add tiny and large code model support for TLS access models other
	// than local exec. We currently generate the same code as small for tiny,
	// which may be larger than needed.

	SDValue TPOff;
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);
	const GlobalValue *GV = GA->getGlobal();

	SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);

	if (Model == TLSModel::LocalExec) {
	return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
	} else if (Model == TLSModel::InitialExec) {
	TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
	TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
	} else if (Model == TLSModel::LocalDynamic) {
	// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
	// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
	// the beginning of the module's TLS region, followed by a DTPREL offset
	// calculation.

	// These accesses will need deduplicating if there's more than one.
	AArch64FunctionInfo *MFI =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
	MFI->incNumLocalDynamicTLSAccesses();

	// The call needs a relocation too for linker relaxation. It doesn't make
	// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
	// the address.
	SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
	AArch64II::MO_TLS);

	// Now we can calculate the offset from TPIDR_EL0 to this module's
	// thread-local area.
	TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);

	// Now use :dtprel_whatever: operations to calculate this variable's offset
	// in its thread-storage area.
	SDValue HiVar = DAG.getTargetGlobalAddress(
	GV, DL, MVT::i64, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue LoVar = DAG.getTargetGlobalAddress(
	GV, DL, MVT::i64, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);

	TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	} else if (Model == TLSModel::GeneralDynamic) {
	// The call needs a relocation too for linker relaxation. It doesn't make
	// sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
	// the address.
	SDValue SymAddr =
	DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);

	// Finally we can make a call to calculate the offset from tpidr_el0.
	TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
	} else
	llvm_unreachable("Unsupported ELF TLS access model");

	return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
	}

	SDValue
	AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");

	SDValue Chain = DAG.getEntryNode();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);

	SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);

	// Load the ThreadLocalStoragePointer from the TEB
	// A pointer to the TLS array is located at offset 0x58 from the TEB.
	SDValue TLSArray =
	DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
	TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
	Chain = TLSArray.getValue(1);

	// Load the TLS index from the C runtime;
	// This does the same as getAddr(), but without having a GlobalAddressSDNode.
	// This also does the same as LOADgot, but using a generic i32 load,
	// while LOADgot only loads i64.
	SDValue TLSIndexHi =
	DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
	SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
	"_tls_index", PtrVT, AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);
	SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
	SDValue TLSIndex =
	DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
	TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
	Chain = TLSIndex.getValue(1);

	// The pointer to the thread's TLS data area is at the TLS Index scaled by 8
	// offset into the TLSArray.
	TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
	SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
	DAG.getConstant(3, DL, PtrVT));
	SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
	DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
	MachinePointerInfo());
	Chain = TLS.getValue(1);

	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	const GlobalValue *GV = GA->getGlobal();
	SDValue TGAHi = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0, AArch64II::MO_TLS \| AArch64II::MO_HI12);
	SDValue TGALo = DAG.getTargetGlobalAddress(
	GV, DL, PtrVT, 0,
	AArch64II::MO_TLS \| AArch64II::MO_PAGEOFF \| AArch64II::MO_NC);

	// Add the offset from the start of the .tls section (section base).
	SDValue Addr =
	SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
	DAG.getTargetConstant(0, DL, MVT::i32)),
	0);
	Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
	return Addr;
	}

	SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {
	const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	if (DAG.getTarget().useEmulatedTLS())
	return LowerToTLSEmulatedModel(GA, DAG);

	if (Subtarget->isTargetDarwin())
	return LowerDarwinGlobalTLSAddress(Op, DAG);
	if (Subtarget->isTargetELF())
	return LowerELFGlobalTLSAddress(Op, DAG);
	if (Subtarget->isTargetWindows())
	return LowerWindowsGlobalTLSAddress(Op, DAG);

	llvm_unreachable("Unexpected platform trying to use TLS");
	}

	// Looks through \param Val to determine the bit that can be used to
	// check the sign of the value. It returns the unextended value and
	// the sign bit position.
	std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
	if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
	return {Val.getOperand(0),
	cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
	1};

	if (Val.getOpcode() == ISD::SIGN_EXTEND)
	return {Val.getOperand(0),
	Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};

	return {Val, Val.getValueSizeInBits() - 1};
	}

	SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
	SDValue Chain = Op.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
	SDValue LHS = Op.getOperand(2);
	SDValue RHS = Op.getOperand(3);
	SDValue Dest = Op.getOperand(4);
	SDLoc dl(Op);

	MachineFunction &MF = DAG.getMachineFunction();
	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
	// will not be produced, as they are conditional branch instructions that do
	// not set flags.
	bool ProduceNonFlagSettingCondBr =
	!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);

	// Handle f128 first, since lowering it will result in comparing the return
	// value of a libcall against zero, which is just what the rest of LowerBR_CC
	// is expecting to deal with.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);

	// If softenSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	// Optimize {s\|u}{add\|sub\|mul}.with.overflow feeding into a branch
	// instruction.
	if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETNE)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
	return SDValue();

	// The actual operation with overflow check.
	AArch64CC::CondCode OFCC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);

	if (CC == ISD::SETNE)
	OFCC = getInvertedCondCode(OFCC);
	SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);

	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Overflow);
	}

	if (LHS.getValueType().isInteger()) {
	assert((LHS.getValueType() == RHS.getValueType()) &&
	(LHS.getValueType() == MVT::i32 \|\| LHS.getValueType() == MVT::i64));

	// If the RHS of the comparison is zero, we can potentially fold this
	// to a specialized branch.
	const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
	if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
	if (CC == ISD::SETEQ) {
	// See if we can use a TBZ to fold in an AND as well.
	// TBZ has a smaller branch displacement than CBZ. If the offset is
	// out of bounds, a late MI-layer pass rewrites branches.
	// 403.gcc is an example that hits this case.
	if (LHS.getOpcode() == ISD::AND &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	isPowerOf2_64(LHS.getConstantOperandVal(1))) {
	SDValue Test = LHS.getOperand(0);
	uint64_t Mask = LHS.getConstantOperandVal(1);
	return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
	DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
	Dest);
	}

	return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
	} else if (CC == ISD::SETNE) {
	// See if we can use a TBZ to fold in an AND as well.
	// TBZ has a smaller branch displacement than CBZ. If the offset is
	// out of bounds, a late MI-layer pass rewrites branches.
	// 403.gcc is an example that hits this case.
	if (LHS.getOpcode() == ISD::AND &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	isPowerOf2_64(LHS.getConstantOperandVal(1))) {
	SDValue Test = LHS.getOperand(0);
	uint64_t Mask = LHS.getConstantOperandVal(1);
	return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
	DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
	Dest);
	}

	return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
	} else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
	// Don't combine AND since emitComparison converts the AND to an ANDS
	// (a.k.a. TST) and the test in the test bit and branch instruction
	// becomes redundant. This would also increase register pressure.
	uint64_t SignBitPos;
	std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
	return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
	DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
	}
	}
	if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
	LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
	// Don't combine AND since emitComparison converts the AND to an ANDS
	// (a.k.a. TST) and the test in the test bit and branch instruction
	// becomes redundant. This would also increase register pressure.
	uint64_t SignBitPos;
	std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
	return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
	DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
	}

	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
	Cmp);
	}

	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::bf16 \|\|
	LHS.getValueType() == MVT::f32 \|\| LHS.getValueType() == MVT::f64);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two branches to implement.
	SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue BR1 =
	DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
	if (CC2 != AArch64CC::AL) {
	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
	Cmp);
	}

	return BR1;
	}

	SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
	SelectionDAG &DAG) const {
	if (!Subtarget->hasNEON())
	return SDValue();

	EVT VT = Op.getValueType();
	EVT IntVT = VT.changeTypeToInteger();
	SDLoc DL(Op);

	SDValue In1 = Op.getOperand(0);
	SDValue In2 = Op.getOperand(1);
	EVT SrcVT = In2.getValueType();

	if (SrcVT.bitsLT(VT))
	In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
	else if (SrcVT.bitsGT(VT))
	In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));

	if (VT.isScalableVector())
	IntVT =
	getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());

	if (VT != In2.getValueType())
	return SDValue();

	auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
	if (VT.isScalableVector())
	return getSVESafeBitCast(VT, Op, DAG);

	return DAG.getBitcast(VT, Op);
	};

	SDValue VecVal1, VecVal2;
	EVT VecVT;
	auto SetVecVal = [&](int Idx = -1) {
	if (!VT.isVector()) {
	VecVal1 =
	DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In1);
	VecVal2 =
	DAG.getTargetInsertSubreg(Idx, DL, VecVT, DAG.getUNDEF(VecVT), In2);
	} else {
	VecVal1 = BitCast(VecVT, In1, DAG);
	VecVal2 = BitCast(VecVT, In2, DAG);
	}
	};
	if (VT.isVector()) {
	VecVT = IntVT;
	SetVecVal();
	} else if (VT == MVT::f64) {
	VecVT = MVT::v2i64;
	SetVecVal(AArch64::dsub);
	} else if (VT == MVT::f32) {
	VecVT = MVT::v4i32;
	SetVecVal(AArch64::ssub);
	} else if (VT == MVT::f16) {
	VecVT = MVT::v8i16;
	SetVecVal(AArch64::hsub);
	} else {
	llvm_unreachable("Invalid type for copysign!");
	}

	unsigned BitWidth = In1.getScalarValueSizeInBits();
	SDValue SignMaskV = DAG.getConstant(~APInt::getSignMask(BitWidth), DL, VecVT);

	// We want to materialize a mask with every bit but the high bit set, but the
	// AdvSIMD immediate moves cannot materialize that in a single instruction for
	// 64-bit elements. Instead, materialize all bits set and then negate that.
	if (VT == MVT::f64 \|\| VT == MVT::v2f64) {
	SignMaskV = DAG.getConstant(APInt::getAllOnes(BitWidth), DL, VecVT);
	SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, SignMaskV);
	SignMaskV = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, SignMaskV);
	SignMaskV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, SignMaskV);
	}

	SDValue BSP =
	DAG.getNode(AArch64ISD::BSP, DL, VecVT, SignMaskV, VecVal1, VecVal2);
	if (VT == MVT::f16)
	return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, BSP);
	if (VT == MVT::f32)
	return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, BSP);
	if (VT == MVT::f64)
	return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, BSP);

	return BitCast(VT, BSP, DAG);
	}

	SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
	SelectionDAG &DAG) const {
	if (DAG.getMachineFunction().getFunction().hasFnAttribute(
	Attribute::NoImplicitFloat))
	return SDValue();

	if (!Subtarget->hasNEON())
	return SDValue();

	bool IsParity = Op.getOpcode() == ISD::PARITY;

	// While there is no integer popcount instruction, it can
	// be more efficiently lowered to the following sequence that uses
	// AdvSIMD registers/instructions as long as the copies to/from
	// the AdvSIMD registers are cheap.
	// FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
	// CNT V0.8B, V0.8B // 8xbyte pop-counts
	// ADDV B0, V0.8B // sum 8xbyte pop-counts
	// UMOV X0, V0.B[0] // copy byte result back to integer reg
	SDValue Val = Op.getOperand(0);
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	if (VT == MVT::i32 \|\| VT == MVT::i64) {
	if (VT == MVT::i32)
	Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);

	SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
	SDValue UaddLV = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);

	if (IsParity)
	UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
	DAG.getConstant(1, DL, MVT::i32));

	if (VT == MVT::i64)
	UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
	return UaddLV;
	} else if (VT == MVT::i128) {
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);

	SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
	SDValue UaddLV = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);

	if (IsParity)
	UaddLV = DAG.getNode(ISD::AND, DL, MVT::i32, UaddLV,
	DAG.getConstant(1, DL, MVT::i32));

	return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
	}

	assert(!IsParity && "ISD::PARITY of vector types not supported");

	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);

	assert((VT == MVT::v1i64 \|\| VT == MVT::v2i64 \|\| VT == MVT::v2i32 \|\|
	VT == MVT::v4i32 \|\| VT == MVT::v4i16 \|\| VT == MVT::v8i16) &&
	"Unexpected type for custom ctpop lowering");

	EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
	Val = DAG.getBitcast(VT8Bit, Val);
	Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);

	// Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
	unsigned EltSize = 8;
	unsigned NumElts = VT.is64BitVector() ? 8 : 16;
	while (EltSize != VT.getScalarSizeInBits()) {
	EltSize *= 2;
	NumElts /= 2;
	MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
	Val = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
	DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
	}

	return Val;
	}

	SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(
	VT, /OverrideNEON=/Subtarget->useSVEForFixedLengthVectors()));

	SDLoc DL(Op);
	SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
	return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
	}

	SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
	SelectionDAG &DAG) const {

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned Opcode = Op.getOpcode();
	ISD::CondCode CC;
	switch (Opcode) {
	default:
	llvm_unreachable("Wrong instruction");
	case ISD::SMAX:
	CC = ISD::SETGT;
	break;
	case ISD::SMIN:
	CC = ISD::SETLT;
	break;
	case ISD::UMAX:
	CC = ISD::SETUGT;
	break;
	case ISD::UMIN:
	CC = ISD::SETULT;
	break;
	}

	if (VT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(
	VT, /OverrideNEON=/Subtarget->useSVEForFixedLengthVectors())) {
	switch (Opcode) {
	default:
	llvm_unreachable("Wrong instruction");
	case ISD::SMAX:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
	case ISD::SMIN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
	case ISD::UMAX:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
	case ISD::UMIN:
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
	}
	}

	SDValue Op0 = Op.getOperand(0);
	SDValue Op1 = Op.getOperand(1);
	SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
	return DAG.getSelect(DL, VT, Cond, Op0, Op1);
	}

	SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (VT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(
	VT, /OverrideNEON=/Subtarget->useSVEForFixedLengthVectors()))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);

	SDLoc DL(Op);
	SDValue REVB;
	MVT VST;

	switch (VT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("Invalid type for bitreverse!");

	case MVT::v2i32: {
	VST = MVT::v8i8;
	REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v4i32: {
	VST = MVT::v16i8;
	REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v1i64: {
	VST = MVT::v8i8;
	REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));

	break;
	}

	case MVT::v2i64: {
	VST = MVT::v16i8;
	REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));

	break;
	}
	}

	return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
	DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
	}

	SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {

	if (Op.getValueType().isVector())
	return LowerVSETCC(Op, DAG);

	bool IsStrict = Op->isStrictFPOpcode();
	bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
	unsigned OpNo = IsStrict ? 1 : 0;
	SDValue Chain;
	if (IsStrict)
	Chain = Op.getOperand(0);
	SDValue LHS = Op.getOperand(OpNo + 0);
	SDValue RHS = Op.getOperand(OpNo + 1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
	SDLoc dl(Op);

	// We chose ZeroOrOneBooleanContents, so use zero and one.
	EVT VT = Op.getValueType();
	SDValue TVal = DAG.getConstant(1, dl, VT);
	SDValue FVal = DAG.getConstant(0, dl, VT);

	// Handle f128 first, since one possible outcome is a normal integer
	// comparison which gets picked up by the next if statement.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
	IsSignaling);

	// If softenSetCCOperands returned a scalar, use it.
	if (!RHS.getNode()) {
	assert(LHS.getValueType() == Op.getValueType() &&
	"Unexpected setcc expansion!");
	return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
	}
	}

	if (LHS.getValueType().isInteger()) {
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(
	LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);

	// Note that we inverted the condition above, so we reverse the order of
	// the true and false operands here. This will allow the setcc to be
	// matched to a single CSINC instruction.
	SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
	return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
	}

	// Now we know we're dealing with FP values.
	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::f32 \|\|
	LHS.getValueType() == MVT::f64);

	// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
	// and do the comparison.
	SDValue Cmp;
	if (IsStrict)
	Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
	else
	Cmp = emitComparison(LHS, RHS, CC, dl, DAG);

	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);
	SDValue Res;
	if (CC2 == AArch64CC::AL) {
	changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
	CC2);
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);

	// Note that we inverted the condition above, so we reverse the order of
	// the true and false operands here. This will allow the setcc to be
	// matched to a single CSINC instruction.
	Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
	} else {
	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
	// totally clean. Some of them require two CSELs to implement. As is in
	// this case, we emit the first CSEL and then emit a second using the output
	// of the first as the RHS. We're effectively OR'ing the two CC's together.

	// FIXME: It would be nice if we could match the two CSELs to two CSINCs.
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue CS1 =
	DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);

	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
	}
	return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
	}

	SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
	SDValue RHS, SDValue TVal,
	SDValue FVal, const SDLoc &dl,
	SelectionDAG &DAG) const {
	// Handle f128 first, because it will result in a comparison of some RTLIB
	// call result against zero.
	if (LHS.getValueType() == MVT::f128) {
	softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);

	// If softenSetCCOperands returned a scalar, we need to compare the result
	// against zero to select between true and false values.
	if (!RHS.getNode()) {
	RHS = DAG.getConstant(0, dl, LHS.getValueType());
	CC = ISD::SETNE;
	}
	}

	// Also handle f16, for which we need to do a f32 comparison.
	if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
	}

	// Next, handle integers.
	if (LHS.getValueType().isInteger()) {
	assert((LHS.getValueType() == RHS.getValueType()) &&
	(LHS.getValueType() == MVT::i32 \|\| LHS.getValueType() == MVT::i64));

	ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
	ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
	ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
	// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
	// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
	// supported types.
	if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
	CTVal->isOne() && CFVal->isAllOnes() &&
	LHS.getValueType() == TVal.getValueType()) {
	EVT VT = LHS.getValueType();
	SDValue Shift =
	DAG.getNode(ISD::SRA, dl, VT, LHS,
	DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
	return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
	}

	unsigned Opcode = AArch64ISD::CSEL;

	// If both the TVal and the FVal are constants, see if we can swap them in
	// order to for a CSINV or CSINC out of them.
	if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	} else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	} else if (TVal.getOpcode() == ISD::XOR) {
	// If TVal is a NOT we want to swap TVal and FVal so that we can match
	// with a CSINV rather than a CSEL.
	if (isAllOnesConstant(TVal.getOperand(1))) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}
	} else if (TVal.getOpcode() == ISD::SUB) {
	// If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
	// that we can match with a CSNEG rather than a CSEL.
	if (isNullConstant(TVal.getOperand(0))) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}
	} else if (CTVal && CFVal) {
	const int64_t TrueVal = CTVal->getSExtValue();
	const int64_t FalseVal = CFVal->getSExtValue();
	bool Swap = false;

	// If both TVal and FVal are constants, see if FVal is the
	// inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
	// instead of a CSEL in that case.
	if (TrueVal == ~FalseVal) {
	Opcode = AArch64ISD::CSINV;
	} else if (FalseVal > std::numeric_limits<int64_t>::min() &&
	TrueVal == -FalseVal) {
	Opcode = AArch64ISD::CSNEG;
	} else if (TVal.getValueType() == MVT::i32) {
	// If our operands are only 32-bit wide, make sure we use 32-bit
	// arithmetic for the check whether we can use CSINC. This ensures that
	// the addition in the check will wrap around properly in case there is
	// an overflow (which would not be the case if we do the check with
	// 64-bit arithmetic).
	const uint32_t TrueVal32 = CTVal->getZExtValue();
	const uint32_t FalseVal32 = CFVal->getZExtValue();

	if ((TrueVal32 == FalseVal32 + 1) \|\| (TrueVal32 + 1 == FalseVal32)) {
	Opcode = AArch64ISD::CSINC;

	if (TrueVal32 > FalseVal32) {
	Swap = true;
	}
	}
	// 64-bit check whether we can use CSINC.
	} else if ((TrueVal == FalseVal + 1) \|\| (TrueVal + 1 == FalseVal)) {
	Opcode = AArch64ISD::CSINC;

	if (TrueVal > FalseVal) {
	Swap = true;
	}
	}

	// Swap TVal and FVal if necessary.
	if (Swap) {
	std::swap(TVal, FVal);
	std::swap(CTVal, CFVal);
	CC = ISD::getSetCCInverse(CC, LHS.getValueType());
	}

	if (Opcode != AArch64ISD::CSEL) {
	// Drop FVal since we can get its value by simply inverting/negating
	// TVal.
	FVal = TVal;
	}
	}

	// Avoid materializing a constant when possible by reusing a known value in
	// a register. However, don't perform this optimization if the known value
	// is one, zero or negative one in the case of a CSEL. We can always
	// materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
	// FVal, respectively.
	ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
	if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
	!RHSVal->isZero() && !RHSVal->isAllOnes()) {
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
	// "a != C ? x : a" to avoid materializing C.
	if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
	TVal = LHS;
	else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
	FVal = LHS;
	} else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
	assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
	// Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
	// avoid materializing C.
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
	Opcode = AArch64ISD::CSINV;
	TVal = LHS;
	FVal = DAG.getConstant(0, dl, FVal.getValueType());
	}
	}

	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
	EVT VT = TVal.getValueType();
	return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
	}

	// Now we know we're dealing with FP values.
	assert(LHS.getValueType() == MVT::f16 \|\| LHS.getValueType() == MVT::f32 \|\|
	LHS.getValueType() == MVT::f64);
	assert(LHS.getValueType() == RHS.getValueType());
	EVT VT = TVal.getValueType();
	SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two CSELs to implement.
	AArch64CC::CondCode CC1, CC2;
	changeFPCCToAArch64CC(CC, CC1, CC2);

	if (DAG.getTarget().Options.UnsafeFPMath) {
	// Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
	// "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
	ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
	if (RHSVal && RHSVal->isZero()) {
	ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
	ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);

	if ((CC == ISD::SETEQ \|\| CC == ISD::SETOEQ \|\| CC == ISD::SETUEQ) &&
	CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
	TVal = LHS;
	else if ((CC == ISD::SETNE \|\| CC == ISD::SETONE \|\| CC == ISD::SETUNE) &&
	CFVal && CFVal->isZero() &&
	FVal.getValueType() == LHS.getValueType())
	FVal = LHS;
	}
	}

	// Emit first, and possibly only, CSEL.
	SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
	SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);

	// If we need a second CSEL, emit it, using the output of the first as the
	// RHS. We're effectively OR'ing the two CC's together.
	if (CC2 != AArch64CC::AL) {
	SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
	return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
	}

	// Otherwise, return the output of the first CSEL.
	return CS1;
	}

	SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT Ty = Op.getValueType();
	auto Idx = Op.getConstantOperandAPInt(2);
	int64_t IdxVal = Idx.getSExtValue();
	assert(Ty.isScalableVector() &&
	"Only expect scalable vectors for custom lowering of VECTOR_SPLICE");

	// We can use the splice instruction for certain index values where we are
	// able to efficiently generate the correct predicate. The index will be
	// inverted and used directly as the input to the ptrue instruction, i.e.
	// -1 -> vl1, -2 -> vl2, etc. The predicate will then be reversed to get the
	// splice predicate. However, we can only do this if we can guarantee that
	// there are enough elements in the vector, hence we check the index <= min
	// number of elements.
	Optional<unsigned> PredPattern;
	if (Ty.isScalableVector() && IdxVal < 0 &&
	(PredPattern = getSVEPredPatternFromNumElements(std::abs(IdxVal))) !=
	None) {
	SDLoc DL(Op);

	// Create a predicate where all but the last -IdxVal elements are false.
	EVT PredVT = Ty.changeVectorElementType(MVT::i1);
	SDValue Pred = getPTrue(DAG, DL, PredVT, *PredPattern);
	Pred = DAG.getNode(ISD::VECTOR_REVERSE, DL, PredVT, Pred);

	// Now splice the two inputs together using the predicate.
	return DAG.getNode(AArch64ISD::SPLICE, DL, Ty, Pred, Op.getOperand(0),
	Op.getOperand(1));
	}

	// This will select to an EXT instruction, which has a maximum immediate
	// value of 255, hence 2048-bits is the maximum value we can lower.
	if (IdxVal >= 0 &&
	IdxVal < int64_t(2048 / Ty.getVectorElementType().getSizeInBits()))
	return Op;

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
	SelectionDAG &DAG) const {
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	SDValue TVal = Op.getOperand(2);
	SDValue FVal = Op.getOperand(3);
	SDLoc DL(Op);
	return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
	}

	SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue CCVal = Op->getOperand(0);
	SDValue TVal = Op->getOperand(1);
	SDValue FVal = Op->getOperand(2);
	SDLoc DL(Op);

	EVT Ty = Op.getValueType();
	if (Ty.isScalableVector()) {
	SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
	MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
	SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
	return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
	}

	if (useSVEForFixedLengthVectorVT(Ty)) {
	// FIXME: Ideally this would be the same as above using i1 types, however
	// for the moment we can't deal with fixed i1 vector types properly, so
	// instead extend the predicate to a result type sized integer vector.
	MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
	MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
	SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
	SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
	return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
	}

	// Optimize {s\|u}{add\|sub\|mul}.with.overflow feeding into a select
	// instruction.
	if (ISD::isOverflowIntrOpRes(CCVal)) {
	// Only lower legal XALUO ops.
	if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
	return SDValue();

	AArch64CC::CondCode OFCC;
	SDValue Value, Overflow;
	std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
	SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);

	return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
	CCVal, Overflow);
	}

	// Lower it the same way as we would lower a SELECT_CC node.
	ISD::CondCode CC;
	SDValue LHS, RHS;
	if (CCVal.getOpcode() == ISD::SETCC) {
	LHS = CCVal.getOperand(0);
	RHS = CCVal.getOperand(1);
	CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
	} else {
	LHS = CCVal;
	RHS = DAG.getConstant(0, DL, CCVal.getValueType());
	CC = ISD::SETNE;
	}
	return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
	}

	SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
	SelectionDAG &DAG) const {
	// Jump table entries as PC relative offsets. No additional tweaking
	// is necessary here. Just get the address of the jump table.
	JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);

	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	!Subtarget->isTargetMachO()) {
	return getAddrLarge(JT, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(JT, DAG);
	}
	return getAddr(JT, DAG);
	}

	SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
	SelectionDAG &DAG) const {
	// Jump table entries as PC relative offsets. No additional tweaking
	// is necessary here. Just get the address of the jump table.
	SDLoc DL(Op);
	SDValue JT = Op.getOperand(1);
	SDValue Entry = Op.getOperand(2);
	int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();

	auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
	AFI->setJumpTableEntryInfo(JTI, 4, nullptr);

	SDNode *Dest =
	DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
	Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
	return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
	SDValue(Dest, 0));
	}

	SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
	SelectionDAG &DAG) const {
	ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);

	if (getTargetMachine().getCodeModel() == CodeModel::Large) {
	// Use the GOT for the large code model on iOS.
	if (Subtarget->isTargetMachO()) {
	return getGOT(CP, DAG);
	}
	return getAddrLarge(CP, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(CP, DAG);
	} else {
	return getAddr(CP, DAG);
	}
	}

	SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
	SelectionDAG &DAG) const {
	BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
	if (getTargetMachine().getCodeModel() == CodeModel::Large &&
	!Subtarget->isTargetMachO()) {
	return getAddrLarge(BA, DAG);
	} else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
	return getAddrTiny(BA, DAG);
	}
	return getAddr(BA, DAG);
	}

	SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	AArch64FunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();

	SDLoc DL(Op);
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
	getPointerTy(DAG.getDataLayout()));
	FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	AArch64FunctionInfo *FuncInfo =
	DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();

	SDLoc DL(Op);
	SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
	? FuncInfo->getVarArgsGPRIndex()
	: FuncInfo->getVarArgsStackIndex(),
	getPointerTy(DAG.getDataLayout()));
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
	SelectionDAG &DAG) const {
	// The layout of the va_list struct is specified in the AArch64 Procedure Call
	// Standard, section B.3.
	MachineFunction &MF = DAG.getMachineFunction();
	AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
	unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
	auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);

	SDValue Chain = Op.getOperand(0);
	SDValue VAList = Op.getOperand(1);
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	SmallVector<SDValue, 4> MemOps;

	// void *__stack at offset 0
	unsigned Offset = 0;
	SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
	Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
	MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
	MachinePointerInfo(SV), Align(PtrSize)));

	// void *__gr_top at offset 8 (4 on ILP32)
	Offset += PtrSize;
	int GPRSize = FuncInfo->getVarArgsGPRSize();
	if (GPRSize > 0) {
	SDValue GRTop, GRTopAddr;

	GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));

	GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
	GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
	DAG.getConstant(GPRSize, DL, PtrVT));
	GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);

	MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
	MachinePointerInfo(SV, Offset),
	Align(PtrSize)));
	}

	// void *__vr_top at offset 16 (8 on ILP32)
	Offset += PtrSize;
	int FPRSize = FuncInfo->getVarArgsFPRSize();
	if (FPRSize > 0) {
	SDValue VRTop, VRTopAddr;
	VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));

	VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
	VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
	DAG.getConstant(FPRSize, DL, PtrVT));
	VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);

	MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
	MachinePointerInfo(SV, Offset),
	Align(PtrSize)));
	}

	// int __gr_offs at offset 24 (12 on ILP32)
	Offset += PtrSize;
	SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));
	MemOps.push_back(
	DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
	GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));

	// int __vr_offs at offset 28 (16 on ILP32)
	Offset += 4;
	SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Offset, DL, PtrVT));
	MemOps.push_back(
	DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
	VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));

	return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
	}

	SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();

	if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
	return LowerWin64_VASTART(Op, DAG);
	else if (Subtarget->isTargetDarwin())
	return LowerDarwin_VASTART(Op, DAG);
	else
	return LowerAAPCS_VASTART(Op, DAG);
	}

	SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
	SelectionDAG &DAG) const {
	// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
	// pointer.
	SDLoc DL(Op);
	unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
	unsigned VaListSize =
	(Subtarget->isTargetDarwin() \|\| Subtarget->isTargetWindows())
	? PtrSize
	: Subtarget->isTargetILP32() ? 20 : 32;
	const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
	const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();

	return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
	DAG.getConstant(VaListSize, DL, MVT::i32),
	Align(PtrSize), false, false, false,
	MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
	}

	SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
	assert(Subtarget->isTargetDarwin() &&
	"automatic va_arg instruction only works on Darwin");

	const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	SDValue Addr = Op.getOperand(1);
	MaybeAlign Align(Op.getConstantOperandVal(3));
	unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
	SDValue VAList =
	DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
	Chain = VAList.getValue(1);
	VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);

	if (VT.isScalableVector())
	report_fatal_error("Passing SVE types to variadic functions is "
	"currently not supported");

	if (Align && *Align > MinSlotSize) {
	VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(Align->value() - 1, DL, PtrVT));
	VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
	DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
	}

	Type ArgTy = VT.getTypeForEVT(DAG.getContext());
	unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);

	// Scalar integer and FP values smaller than 64 bits are implicitly extended
	// up to 64 bits. At the very least, we have to increase the striding of the
	// vaargs list to match this, and for FP values we need to introduce
	// FP_ROUND nodes as well.
	if (VT.isInteger() && !VT.isVector())
	ArgSize = std::max(ArgSize, MinSlotSize);
	bool NeedFPTrunc = false;
	if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
	ArgSize = 8;
	NeedFPTrunc = true;
	}

	// Increment the pointer, VAList, to the next vaarg
	SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getConstant(ArgSize, DL, PtrVT));
	VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);

	// Store the incremented VAList to the legalized pointer
	SDValue APStore =
	DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));

	// Load the actual argument out of the pointer VAList
	if (NeedFPTrunc) {
	// Load the value as an f64.
	SDValue WideFP =
	DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
	// Round the value down to an f32.
	SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
	DAG.getIntPtrConstant(1, DL));
	SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
	// Merge the rounded value with the chain output of the load.
	return DAG.getMergeValues(Ops, DL);
	}

	return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
	}

	SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDValue FrameAddr =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
	while (Depth--)
	FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
	MachinePointerInfo());

	if (Subtarget->isTargetILP32())
	FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
	DAG.getValueType(VT));

	return FrameAddr;
	}

	SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

	EVT VT = getPointerTy(DAG.getDataLayout());
	SDLoc DL(Op);
	int FI = MFI.CreateFixedObject(4, 0, false);
	return DAG.getFrameIndex(FI, VT);
	}

	#define GET_REGISTER_MATCHER
	#include "AArch64GenAsmMatcher.inc"

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	Register AArch64TargetLowering::
	getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
	Register Reg = MatchRegisterName(RegName);
	if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
	const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
	unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
	if (!Subtarget->isXRegisterReserved(DwarfRegNum))
	Reg = 0;
	}
	if (Reg)
	return Reg;
	report_fatal_error(Twine("Invalid register name \""
	+ StringRef(RegName) + "\"."));
	}

	SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);

	SDValue FrameAddr =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
	SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));

	return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
	}

	SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDValue ReturnAddress;
	if (Depth) {
	SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
	SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
	ReturnAddress = DAG.getLoad(
	VT, DL, DAG.getEntryNode(),
	DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
	} else {
	// Return LR, which contains the return address. Mark it an implicit
	// live-in.
	Register Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
	ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
	}

	// The XPACLRI instruction assembles to a hint-space instruction before
	// Armv8.3-A therefore this instruction can be safely used for any pre
	// Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
	// that instead.
	SDNode *St;
	if (Subtarget->hasPAuth()) {
	St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
	} else {
	// XPACLRI operates on LR therefore we must move the operand accordingly.
	SDValue Chain =
	DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
	St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
	}
	return SDValue(St, 0);
	}

	/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
	/// i32 values and take a 2 x i32 value to shift plus a shift amount.
	SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Lo, Hi;
	expandShiftParts(Op.getNode(), Lo, Hi, DAG);
	return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
	}

	bool AArch64TargetLowering::isOffsetFoldingLegal(
	const GlobalAddressSDNode *GA) const {
	// Offsets are folded in the DAG combine rather than here so that we can
	// intelligently choose an offset based on the uses.
	return false;
	}

	bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
	bool OptForSize) const {
	bool IsLegal = false;
	// We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
	// 16-bit case when target has full fp16 support.
	// FIXME: We should be able to handle f128 as well with a clever lowering.
	const APInt ImmInt = Imm.bitcastToAPInt();
	if (VT == MVT::f64)
	IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	else if (VT == MVT::f32)
	IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	else if (VT == MVT::f16 && Subtarget->hasFullFP16())
	IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 \|\| Imm.isPosZero();
	// TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
	// generate that fmov.

	// If we can not materialize in immediate field for fmov, check if the
	// value can be encoded as the immediate operand of a logical instruction.
	// The immediate value will be created with either MOVZ, MOVN, or ORR.
	if (!IsLegal && (VT == MVT::f64 \|\| VT == MVT::f32)) {
	// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
	// however the mov+fmov sequence is always better because of the reduced
	// cache pressure. The timings are still the same if you consider
	// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
	// movw+movk is fused). So we limit up to 2 instrdduction at most.
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
	AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
	Insn);
	unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
	IsLegal = Insn.size() <= Limit;
	}

	LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
	<< " imm value: "; Imm.dump(););
	return IsLegal;
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Optimization Hooks
	//===----------------------------------------------------------------------===//

	static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
	SDValue Operand, SelectionDAG &DAG,
	int &ExtraSteps) {
	EVT VT = Operand.getValueType();
	if ((ST->hasNEON() &&
	(VT == MVT::f64 \|\| VT == MVT::v1f64 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::f32 \|\| VT == MVT::v1f32 \|\| VT == MVT::v2f32 \|\|
	VT == MVT::v4f32)) \|\|
	(ST->hasSVE() &&
	(VT == MVT::nxv8f16 \|\| VT == MVT::nxv4f32 \|\| VT == MVT::nxv2f64))) {
	if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
	// For the reciprocal estimates, convergence is quadratic, so the number
	// of digits is doubled after each iteration. In ARMv8, the accuracy of
	// the initial estimate is 2^-8. Thus the number of extra steps to refine
	// the result for float (23 mantissa bits) is 2 and for double (52
	// mantissa bits) is 3.
	ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;

	return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
	}

	return SDValue();
	}

	SDValue
	AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
	const DenormalMode &Mode) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
	SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
	return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
	}

	SDValue
	AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
	SelectionDAG &DAG) const {
	return Op;
	}

	SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &ExtraSteps,
	bool &UseOneConst,
	bool Reciprocal) const {
	if (Enabled == ReciprocalEstimate::Enabled \|\|
	(Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
	if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
	DAG, ExtraSteps)) {
	SDLoc DL(Operand);
	EVT VT = Operand.getValueType();

	SDNodeFlags Flags;
	Flags.setAllowReassociation(true);

	// Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
	// AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
	for (int i = ExtraSteps; i > 0; --i) {
	SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
	Flags);
	Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
	}
	if (!Reciprocal)
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);

	ExtraSteps = 0;
	return Estimate;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
	SelectionDAG &DAG, int Enabled,
	int &ExtraSteps) const {
	if (Enabled == ReciprocalEstimate::Enabled)
	if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
	DAG, ExtraSteps)) {
	SDLoc DL(Operand);
	EVT VT = Operand.getValueType();

	SDNodeFlags Flags;
	Flags.setAllowReassociation(true);

	// Newton reciprocal iteration: E * (2 - X * E)
	// AArch64 reciprocal iteration instruction: (2 - M * N)
	for (int i = ExtraSteps; i > 0; --i) {
	SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
	Estimate, Flags);
	Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
	}

	ExtraSteps = 0;
	return Estimate;
	}

	return SDValue();
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Inline Assembly Support
	//===----------------------------------------------------------------------===//

	// Table of Constraints
	// TODO: This is the current set of constraints supported by ARM for the
	// compiler, not all of them may make sense.
	//
	// r - A general register
	// w - An FP/SIMD register of some size in the range v0-v31
	// x - An FP/SIMD register of some size in the range v0-v15
	// I - Constant that can be used with an ADD instruction
	// J - Constant that can be used with a SUB instruction
	// K - Constant that can be used with a 32-bit logical instruction
	// L - Constant that can be used with a 64-bit logical instruction
	// M - Constant that can be used as a 32-bit MOV immediate
	// N - Constant that can be used as a 64-bit MOV immediate
	// Q - A memory reference with base register and no offset
	// S - A symbolic address
	// Y - Floating point constant zero
	// Z - Integer constant zero
	//
	// Note that general register operands will be output using their 64-bit x
	// register name, whatever the size of the variable, unless the asm operand
	// is prefixed by the %w modifier. Floating-point and SIMD register operands
	// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
	// %q modifier.
	const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
	// At this point, we have to lower this constraint to something else, so we
	// lower it to an "r" or "w". However, by doing this we will force the result
	// to be in register, while the X constraint is much more permissive.
	//
	// Although we are correct (we are free to emit anything, without
	// constraints), we might break use cases that would expect us to be more
	// efficient and emit something else.
	if (!Subtarget->hasFPARMv8())
	return "r";

	if (ConstraintVT.isFloatingPoint())
	return "w";

	if (ConstraintVT.isVector() &&
	(ConstraintVT.getSizeInBits() == 64 \|\|
	ConstraintVT.getSizeInBits() == 128))
	return "w";

	return "r";
	}

	enum PredicateConstraint {
	Upl,
	Upa,
	Invalid
	};

	static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
	PredicateConstraint P = PredicateConstraint::Invalid;
	if (Constraint == "Upa")
	P = PredicateConstraint::Upa;
	if (Constraint == "Upl")
	P = PredicateConstraint::Upl;
	return P;
	}

	/// getConstraintType - Given a constraint letter, return the type of
	/// constraint it is for this target.
	AArch64TargetLowering::ConstraintType
	AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default:
	break;
	case 'x':
	case 'w':
	case 'y':
	return C_RegisterClass;
	// An address with a single base register. Due to the way we
	// currently handle addresses it is the same as 'r'.
	case 'Q':
	return C_Memory;
	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	case 'Y':
	case 'Z':
	return C_Immediate;
	case 'z':
	case 'S': // A symbolic address
	return C_Other;
	}
	} else if (parsePredicateConstraint(Constraint) !=
	PredicateConstraint::Invalid)
	return C_RegisterClass;
	return TargetLowering::getConstraintType(Constraint);
	}

	/// Examine constraint type and operand type and determine a weight value.
	/// This object must already have been set up with the operand type
	/// and the current alternative constraint selected.
	TargetLowering::ConstraintWeight
	AArch64TargetLowering::getSingleConstraintMatchWeight(
	AsmOperandInfo &info, const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;
	Type *type = CallOperandVal->getType();
	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	break;
	case 'x':
	case 'w':
	case 'y':
	if (type->isFloatingPointTy() \|\| type->isVectorTy())
	weight = CW_Register;
	break;
	case 'z':
	weight = CW_Constant;
	break;
	case 'U':
	if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
	weight = CW_Register;
	break;
	}
	return weight;
	}

	std::pair<unsigned, const TargetRegisterClass *>
	AArch64TargetLowering::getRegForInlineAsmConstraint(
	const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'r':
	if (VT.isScalableVector())
	return std::make_pair(0U, nullptr);
	if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
	return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
	if (VT.getFixedSizeInBits() == 64)
	return std::make_pair(0U, &AArch64::GPR64commonRegClass);
	return std::make_pair(0U, &AArch64::GPR32commonRegClass);
	case 'w': {
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector()) {
	if (VT.getVectorElementType() != MVT::i1)
	return std::make_pair(0U, &AArch64::ZPRRegClass);
	return std::make_pair(0U, nullptr);
	}
	uint64_t VTSize = VT.getFixedSizeInBits();
	if (VTSize == 16)
	return std::make_pair(0U, &AArch64::FPR16RegClass);
	if (VTSize == 32)
	return std::make_pair(0U, &AArch64::FPR32RegClass);
	if (VTSize == 64)
	return std::make_pair(0U, &AArch64::FPR64RegClass);
	if (VTSize == 128)
	return std::make_pair(0U, &AArch64::FPR128RegClass);
	break;
	}
	// The instructions that this constraint is designed for can
	// only take 128-bit registers so just use that regclass.
	case 'x':
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector())
	return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
	if (VT.getSizeInBits() == 128)
	return std::make_pair(0U, &AArch64::FPR128_loRegClass);
	break;
	case 'y':
	if (!Subtarget->hasFPARMv8())
	break;
	if (VT.isScalableVector())
	return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
	break;
	}
	} else {
	PredicateConstraint PC = parsePredicateConstraint(Constraint);
	if (PC != PredicateConstraint::Invalid) {
	if (!VT.isScalableVector() \|\| VT.getVectorElementType() != MVT::i1)
	return std::make_pair(0U, nullptr);
	bool restricted = (PC == PredicateConstraint::Upl);
	return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
	: std::make_pair(0U, &AArch64::PPRRegClass);
	}
	}
	if (StringRef("{cc}").equals_insensitive(Constraint))
	return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);

	// Use the default implementation in TargetLowering to convert the register
	// constraint into a member of a register class.
	std::pair<unsigned, const TargetRegisterClass *> Res;
	Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);

	// Not found as a standard register?
	if (!Res.second) {
	unsigned Size = Constraint.size();
	if ((Size == 4 \|\| Size == 5) && Constraint[0] == '{' &&
	tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
	int RegNo;
	bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
	if (!Failed && RegNo >= 0 && RegNo <= 31) {
	// v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
	// By default we'll emit v0-v31 for this unless there's a modifier where
	// we'll emit the correct register as well.
	if (VT != MVT::Other && VT.getSizeInBits() == 64) {
	Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
	Res.second = &AArch64::FPR64RegClass;
	} else {
	Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
	Res.second = &AArch64::FPR128RegClass;
	}
	}
	}
	}

	if (Res.second && !Subtarget->hasFPARMv8() &&
	!AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
	!AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
	return std::make_pair(0U, nullptr);

	return Res;
	}

	EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
	llvm::Type *Ty,
	bool AllowUnknown) const {
	if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
	return EVT(MVT::i64x8);

	return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
	}

	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
	/// vector. If it is invalid, don't add anything to Ops.
	void AArch64TargetLowering::LowerAsmOperandForConstraint(
	SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Currently only support length 1 constraints.
	if (Constraint.length() != 1)
	return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default:
	break;

	// This set of constraints deal with valid constants for various instructions.
	// Validate and return a target constant for them if we can.
	case 'z': {
	// 'z' maps to xzr or wzr so it needs an input of 0.
	if (!isNullConstant(Op))
	return;

	if (Op.getValueType() == MVT::i64)
	Result = DAG.getRegister(AArch64::XZR, MVT::i64);
	else
	Result = DAG.getRegister(AArch64::WZR, MVT::i32);
	break;
	}
	case 'S': {
	// An absolute symbolic address or label reference.
	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
	Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
	GA->getValueType(0));
	} else if (const BlockAddressSDNode *BA =
	dyn_cast<BlockAddressSDNode>(Op)) {
	Result =
	DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
	} else
	return;
	break;
	}

	case 'I':
	case 'J':
	case 'K':
	case 'L':
	case 'M':
	case 'N':
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
	if (!C)
	return;

	// Grab the value and do some validation.
	uint64_t CVal = C->getZExtValue();
	switch (ConstraintLetter) {
	// The I constraint applies only to simple ADD or SUB immediate operands:
	// i.e. 0 to 4095 with optional shift by 12
	// The J constraint applies only to ADD or SUB immediates that would be
	// valid when negated, i.e. if [an add pattern] were to be output as a SUB
	// instruction [or vice versa], in other words -1 to -4095 with optional
	// left shift by 12.
	case 'I':
	if (isUInt<12>(CVal) \|\| isShiftedUInt<12, 12>(CVal))
	break;
	return;
	case 'J': {
	uint64_t NVal = -C->getSExtValue();
	if (isUInt<12>(NVal) \|\| isShiftedUInt<12, 12>(NVal)) {
	CVal = C->getSExtValue();
	break;
	}
	return;
	}
	// The K and L constraints apply only to logical immediates, including
	// what used to be the MOVI alias for ORR (though the MOVI alias has now
	// been removed and MOV should be used). So these constraints have to
	// distinguish between bit patterns that are valid 32-bit or 64-bit
	// "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
	// not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
	// versa.
	case 'K':
	if (AArch64_AM::isLogicalImmediate(CVal, 32))
	break;
	return;
	case 'L':
	if (AArch64_AM::isLogicalImmediate(CVal, 64))
	break;
	return;
	// The M and N constraints are a superset of K and L respectively, for use
	// with the MOV (immediate) alias. As well as the logical immediates they
	// also match 32 or 64-bit immediates that can be loaded either using a
	// single MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
	// (M) or 64-bit 0x1234000000000000 (N) etc.
	// As a note some of this code is liberally stolen from the asm parser.
	case 'M': {
	if (!isUInt<32>(CVal))
	return;
	if (AArch64_AM::isLogicalImmediate(CVal, 32))
	break;
	if ((CVal & 0xFFFF) == CVal)
	break;
	if ((CVal & 0xFFFF0000ULL) == CVal)
	break;
	uint64_t NCVal = ~(uint32_t)CVal;
	if ((NCVal & 0xFFFFULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF0000ULL) == NCVal)
	break;
	return;
	}
	case 'N': {
	if (AArch64_AM::isLogicalImmediate(CVal, 64))
	break;
	if ((CVal & 0xFFFFULL) == CVal)
	break;
	if ((CVal & 0xFFFF0000ULL) == CVal)
	break;
	if ((CVal & 0xFFFF00000000ULL) == CVal)
	break;
	if ((CVal & 0xFFFF000000000000ULL) == CVal)
	break;
	uint64_t NCVal = ~CVal;
	if ((NCVal & 0xFFFFULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF0000ULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF00000000ULL) == NCVal)
	break;
	if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
	break;
	return;
	}
	default:
	return;
	}

	// All assembler immediates are 64-bit integers.
	Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
	break;
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}

	return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	//===----------------------------------------------------------------------===//
	// AArch64 Advanced SIMD Support
	//===----------------------------------------------------------------------===//

	/// WidenVector - Given a value in the V64 register class, produce the
	/// equivalent value in the V128 register class.
	static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
	EVT VT = V64Reg.getValueType();
	unsigned NarrowSize = VT.getVectorNumElements();
	MVT EltTy = VT.getVectorElementType().getSimpleVT();
	MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
	SDLoc DL(V64Reg);

	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
	V64Reg, DAG.getConstant(0, DL, MVT::i64));
	}

	/// getExtFactor - Determine the adjustment factor for the position when
	/// generating an "extract from vector registers" instruction.
	static unsigned getExtFactor(SDValue &V) {
	EVT EltType = V.getValueType().getVectorElementType();
	return EltType.getSizeInBits() / 8;
	}

	/// NarrowVector - Given a value in the V128 register class, produce the
	/// equivalent value in the V64 register class.
	static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
	EVT VT = V128Reg.getValueType();
	unsigned WideSize = VT.getVectorNumElements();
	MVT EltTy = VT.getVectorElementType().getSimpleVT();
	MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
	SDLoc DL(V128Reg);

	return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
	}

	// Gather data to see if the operation can be modelled as a
	// shuffle in combination with VEXTs.
	SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
	LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	assert(!VT.isScalableVector() &&
	"Scalable vectors cannot be used with ISD::BUILD_VECTOR");
	unsigned NumElts = VT.getVectorNumElements();

	struct ShuffleSourceInfo {
	SDValue Vec;
	unsigned MinElt;
	unsigned MaxElt;

	// We may insert some combination of BITCASTs and VEXT nodes to force Vec to
	// be compatible with the shuffle we intend to construct. As a result
	// ShuffleVec will be some sliding window into the original Vec.
	SDValue ShuffleVec;

	// Code should guarantee that element i in Vec starts at element "WindowBase
	// + i * WindowScale in ShuffleVec".
	int WindowBase;
	int WindowScale;

	ShuffleSourceInfo(SDValue Vec)
	: Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
	ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}

	bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
	};

	// First gather all vectors used as an immediate source for this BUILD_VECTOR
	// node.
	SmallVector<ShuffleSourceInfo, 2> Sources;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	!isa<ConstantSDNode>(V.getOperand(1)) \|\|
	V.getOperand(0).getValueType().isScalableVector()) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: "
	"a shuffle can only come from building a vector from "
	"various elements of other fixed-width vectors, provided "
	"their indices are constant\n");
	return SDValue();
	}

	// Add this element source to the list if it's not already there.
	SDValue SourceVec = V.getOperand(0);
	auto Source = find(Sources, SourceVec);
	if (Source == Sources.end())
	Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));

	// Update the minimum and maximum lane number seen.
	unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
	Source->MinElt = std::min(Source->MinElt, EltNo);
	Source->MaxElt = std::max(Source->MaxElt, EltNo);
	}

	// If we have 3 or 4 sources, try to generate a TBL, which will at least be
	// better than moving to/from gpr registers for larger vectors.
	if ((Sources.size() == 3 \|\| Sources.size() == 4) && NumElts > 4) {
	// Construct a mask for the tbl. We may need to adjust the index for types
	// larger than i8.
	SmallVector<unsigned, 16> Mask;
	unsigned OutputFactor = VT.getScalarSizeInBits() / 8;
	for (unsigned I = 0; I < NumElts; ++I) {
	SDValue V = Op.getOperand(I);
	if (V.isUndef()) {
	for (unsigned OF = 0; OF < OutputFactor; OF++)
	Mask.push_back(-1);
	continue;
	}
	// Set the Mask lanes adjusted for the size of the input and output
	// lanes. The Mask is always i8, so it will set OutputFactor lanes per
	// output element, adjusted in their positions per input and output types.
	unsigned Lane = V.getConstantOperandVal(1);
	for (unsigned S = 0; S < Sources.size(); S++) {
	if (V.getOperand(0) == Sources[S].Vec) {
	unsigned InputSize = Sources[S].Vec.getScalarValueSizeInBits();
	unsigned InputBase = 16 * S + Lane * InputSize / 8;
	for (unsigned OF = 0; OF < OutputFactor; OF++)
	Mask.push_back(InputBase + OF);
	break;
	}
	}
	}

	// Construct the tbl3/tbl4 out of an intrinsic, the sources converted to
	// v16i8, and the TBLMask
	SmallVector<SDValue, 16> TBLOperands;
	TBLOperands.push_back(DAG.getConstant(Sources.size() == 3
	? Intrinsic::aarch64_neon_tbl3
	: Intrinsic::aarch64_neon_tbl4,
	dl, MVT::i32));
	for (unsigned i = 0; i < Sources.size(); i++) {
	SDValue Src = Sources[i].Vec;
	EVT SrcVT = Src.getValueType();
	Src = DAG.getBitcast(SrcVT.is64BitVector() ? MVT::v8i8 : MVT::v16i8, Src);
	assert((SrcVT.is64BitVector() \|\| SrcVT.is128BitVector()) &&
	"Expected a legally typed vector");
	if (SrcVT.is64BitVector())
	Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src,
	DAG.getUNDEF(MVT::v8i8));
	TBLOperands.push_back(Src);
	}

	SmallVector<SDValue, 16> TBLMask;
	for (unsigned i = 0; i < Mask.size(); i++)
	TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
	assert((Mask.size() == 8 \|\| Mask.size() == 16) &&
	"Expected a v8i8 or v16i8 Mask");
	TBLOperands.push_back(
	DAG.getBuildVector(Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));

	SDValue Shuffle =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
	Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
	return DAG.getBitcast(VT, Shuffle);
	}

	if (Sources.size() > 2) {
	LLVM_DEBUG(dbgs() << "Reshuffle failed: currently only do something "
	<< "sensible when at most two source vectors are "
	<< "involved\n");
	return SDValue();
	}

	// Find out the smallest element size among result and two sources, and use
	// it as element size to build the shuffle_vector.
	EVT SmallestEltTy = VT.getVectorElementType();
	for (auto &Source : Sources) {
	EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
	if (SrcEltTy.bitsLT(SmallestEltTy)) {
	SmallestEltTy = SrcEltTy;
	}
	}
	unsigned ResMultiplier =
	VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
	uint64_t VTSize = VT.getFixedSizeInBits();
	NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
	EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);

	// If the source vector is too wide or too narrow, we may nevertheless be able
	// to construct a compatible shuffle either by concatenating it with UNDEF or
	// extracting a suitable range of elements.
	for (auto &Src : Sources) {
	EVT SrcVT = Src.ShuffleVec.getValueType();

	TypeSize SrcVTSize = SrcVT.getSizeInBits();
	if (SrcVTSize == TypeSize::Fixed(VTSize))
	continue;

	// This stage of the search produces a source with the same element type as
	// the original, but with a total width matching the BUILD_VECTOR output.
	EVT EltVT = SrcVT.getVectorElementType();
	unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
	EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);

	if (SrcVTSize.getFixedValue() < VTSize) {
	assert(2 * SrcVTSize == VTSize);
	// We can pad out the smaller vector for free, so if it's part of a
	// shuffle...
	Src.ShuffleVec =
	DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
	DAG.getUNDEF(Src.ShuffleVec.getValueType()));
	continue;
	}

	if (SrcVTSize.getFixedValue() != 2 * VTSize) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: result vector too small to extract\n");
	return SDValue();
	}

	if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
	return SDValue();
	}

	if (Src.MinElt >= NumSrcElts) {
	// The extraction can just take the second half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i64));
	Src.WindowBase = -NumSrcElts;
	} else if (Src.MaxElt < NumSrcElts) {
	// The extraction can just take the first half
	Src.ShuffleVec =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i64));
	} else {
	// An actual VEXT is needed
	SDValue VEXTSrc1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(0, dl, MVT::i64));
	SDValue VEXTSrc2 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
	DAG.getConstant(NumSrcElts, dl, MVT::i64));
	unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);

	if (!SrcVT.is64BitVector()) {
	LLVM_DEBUG(
	dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
	"for SVE vectors.");
	return SDValue();
	}

	Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
	VEXTSrc2,
	DAG.getConstant(Imm, dl, MVT::i32));
	Src.WindowBase = -Src.MinElt;
	}
	}

	// Another possible incompatibility occurs from the vector element types. We
	// can fix this by bitcasting the source vectors to the same type we intend
	// for the shuffle.
	for (auto &Src : Sources) {
	EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
	if (SrcEltTy == SmallestEltTy)
	continue;
	assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
	Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
	Src.WindowScale =
	SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
	Src.WindowBase *= Src.WindowScale;
	}

	// Final check before we try to actually produce a shuffle.
	LLVM_DEBUG(for (auto Src
	: Sources)
	assert(Src.ShuffleVec.getValueType() == ShuffleVT););

	// The stars all align, our next step is to produce the mask for the shuffle.
	SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
	int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
	for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
	SDValue Entry = Op.getOperand(i);
	if (Entry.isUndef())
	continue;

	auto Src = find(Sources, Entry.getOperand(0));
	int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();

	// EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
	// trunc. So only std::min(SrcBits, DestBits) actually get defined in this
	// segment.
	EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
	int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
	VT.getScalarSizeInBits());
	int LanesDefined = BitsDefined / BitsPerShuffleLane;

	// This source is expected to fill ResMultiplier lanes of the final shuffle,
	// starting at the appropriate offset.
	int LaneMask = &Mask[i ResMultiplier];

	int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
	ExtractBase += NumElts * (Src - Sources.begin());
	for (int j = 0; j < LanesDefined; ++j)
	LaneMask[j] = ExtractBase + j;
	}

	// Final check before we try to produce nonsense...
	if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
	LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
	return SDValue();
	}

	SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
	for (unsigned i = 0; i < Sources.size(); ++i)
	ShuffleOps[i] = Sources[i].ShuffleVec;

	SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
	ShuffleOps[1], Mask);
	SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);

	LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
	dbgs() << "Reshuffle, creating node: "; V.dump(););

	return V;
	}

	// check if an EXT instruction can handle the shuffle mask when the
	// vector sources of the shuffle are the same.
	static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
	unsigned NumElts = VT.getVectorNumElements();

	// Assume that the first shuffle index is not UNDEF. Fail if it is.
	if (M[0] < 0)
	return false;

	Imm = M[0];

	// If this is a VEXT shuffle, the immediate value is the index of the first
	// element. The other shuffle indices must be the successive elements after
	// the first one.
	unsigned ExpectedElt = Imm;
	for (unsigned i = 1; i < NumElts; ++i) {
	// Increment the expected index. If it wraps around, just follow it
	// back to index zero and keep going.
	++ExpectedElt;
	if (ExpectedElt == NumElts)
	ExpectedElt = 0;

	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if (ExpectedElt != static_cast<unsigned>(M[i]))
	return false;
	}

	return true;
	}

	// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
	// v4i32s. This is really a truncate, which we can construct out of (legal)
	// concats and truncate nodes.
	static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) {
	if (V.getValueType() != MVT::v16i8)
	return SDValue();
	assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR");

	for (unsigned X = 0; X < 4; X++) {
	// Check the first item in each group is an extract from lane 0 of a v4i32
	// or v4i16.
	SDValue BaseExt = V.getOperand(X * 4);
	if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	(BaseExt.getOperand(0).getValueType() != MVT::v4i16 &&
	BaseExt.getOperand(0).getValueType() != MVT::v4i32) \|\|
	!isa<ConstantSDNode>(BaseExt.getOperand(1)) \|\|
	BaseExt.getConstantOperandVal(1) != 0)
	return SDValue();
	SDValue Base = BaseExt.getOperand(0);
	// And check the other items are extracts from the same vector.
	for (unsigned Y = 1; Y < 4; Y++) {
	SDValue Ext = V.getOperand(X * 4 + Y);
	if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Ext.getOperand(0) != Base \|\|
	!isa<ConstantSDNode>(Ext.getOperand(1)) \|\|
	Ext.getConstantOperandVal(1) != Y)
	return SDValue();
	}
	}

	// Turn the buildvector into a series of truncates and concates, which will
	// become uzip1's. Any v4i32s we found get truncated to v4i16, which are
	// concat together to produce 2 v8i16. These are both truncated and concat
	// together.
	SDLoc DL(V);
	SDValue Trunc[4] = {
	V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0),
	V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)};
	for (int I = 0; I < 4; I++)
	if (Trunc[I].getValueType() == MVT::v4i32)
	Trunc[I] = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, Trunc[I]);
	SDValue Concat0 =
	DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]);
	SDValue Concat1 =
	DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]);
	SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0);
	SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1);
	}

	/// Check if a vector shuffle corresponds to a DUP instructions with a larger
	/// element width than the vector lane type. If that is the case the function
	/// returns true and writes the value of the DUP instruction lane operand into
	/// DupLaneOp
	static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
	unsigned &DupLaneOp) {
	assert((BlockSize == 16 \|\| BlockSize == 32 \|\| BlockSize == 64) &&
	"Only possible block sizes for wide DUP are: 16, 32, 64");

	if (BlockSize <= VT.getScalarSizeInBits())
	return false;
	if (BlockSize % VT.getScalarSizeInBits() != 0)
	return false;
	if (VT.getSizeInBits() % BlockSize != 0)
	return false;

	size_t SingleVecNumElements = VT.getVectorNumElements();
	size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
	size_t NumBlocks = VT.getSizeInBits() / BlockSize;

	// We are looking for masks like
	// [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
	// might be replaced by 'undefined'. BlockIndices will eventually contain
	// lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
	// for the above examples)
	SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
	for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
	for (size_t I = 0; I < NumEltsPerBlock; I++) {
	int Elt = M[BlockIndex * NumEltsPerBlock + I];
	if (Elt < 0)
	continue;
	// For now we don't support shuffles that use the second operand
	if ((unsigned)Elt >= SingleVecNumElements)
	return false;
	if (BlockElts[I] < 0)
	BlockElts[I] = Elt;
	else if (BlockElts[I] != Elt)
	return false;
	}

	// We found a candidate block (possibly with some undefs). It must be a
	// sequence of consecutive integers starting with a value divisible by
	// NumEltsPerBlock with some values possibly replaced by undef-s.

	// Find first non-undef element
	auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
	assert(FirstRealEltIter != BlockElts.end() &&
	"Shuffle with all-undefs must have been caught by previous cases, "
	"e.g. isSplat()");
	if (FirstRealEltIter == BlockElts.end()) {
	DupLaneOp = 0;
	return true;
	}

	// Index of FirstRealElt in BlockElts
	size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();

	if ((unsigned)*FirstRealEltIter < FirstRealIndex)
	return false;
	// BlockElts[0] must have the following value if it isn't undef:
	size_t Elt0 = *FirstRealEltIter - FirstRealIndex;

	// Check the first element
	if (Elt0 % NumEltsPerBlock != 0)
	return false;
	// Check that the sequence indeed consists of consecutive integers (modulo
	// undefs)
	for (size_t I = 0; I < NumEltsPerBlock; I++)
	if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
	return false;

	DupLaneOp = Elt0 / NumEltsPerBlock;
	return true;
	}

	// check if an EXT instruction can handle the shuffle mask when the
	// vector sources of the shuffle are different.
	static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
	unsigned &Imm) {
	// Look for the first non-undef element.
	const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });

	// Benefit form APInt to handle overflow when calculating expected element.
	unsigned NumElts = VT.getVectorNumElements();
	unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
	APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
	// The following shuffle indices must be the successive elements after the
	// first real element.
	const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
	[&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
	if (FirstWrongElt != M.end())
	return false;

	// The index of an EXT is the first element if it is not UNDEF.
	// Watch out for the beginning UNDEFs. The EXT index should be the expected
	// value of the first element. E.g.
	// <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
	// <-1, -1, 0, 1, ...> is treated as <2NumElts-2, 2NumElts-1, 0, 1, ...>.
	// ExpectedElt is the last mask index plus 1.
	Imm = ExpectedElt.getZExtValue();

	// There are two difference cases requiring to reverse input vectors.
	// For example, for vector <4 x i32> we have the following cases,
	// Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
	// Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
	// For both cases, we finally use mask <5, 6, 7, 0>, which requires
	// to reverse two input vectors.
	if (Imm < NumElts)
	ReverseEXT = true;
	else
	Imm -= NumElts;

	return true;
	}

	/// isREVMask - Check if a vector shuffle corresponds to a REV
	/// instruction with the specified blocksize. (The order of the elements
	/// within each block of the vector is reversed.)
	static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
	assert((BlockSize == 16 \|\| BlockSize == 32 \|\| BlockSize == 64) &&
	"Only possible block sizes for REV are: 16, 32, 64");

	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 64)
	return false;

	unsigned NumElts = VT.getVectorNumElements();
	unsigned BlockElts = M[0] + 1;
	// If the first shuffle index is UNDEF, be optimistic.
	if (M[0] < 0)
	BlockElts = BlockSize / EltSz;

	if (BlockSize <= EltSz \|\| BlockSize != BlockElts * EltSz)
	return false;

	for (unsigned i = 0; i < NumElts; ++i) {
	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
	return false;
	}

	return true;
	}

	static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != Idx) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
	return false;
	Idx += 1;
	}

	return true;
	}

	static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i != NumElts; ++i) {
	if (M[i] < 0)
	continue; // ignore UNDEF indices
	if ((unsigned)M[i] != 2 * i + WhichResult)
	return false;
	}

	return true;
	}

	static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i < NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
	return false;
	}
	return true;
	}

	/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
	static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	unsigned Idx = WhichResult * NumElts / 2;
	for (unsigned i = 0; i != NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != Idx) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
	return false;
	Idx += 1;
	}

	return true;
	}

	/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
	static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned Half = VT.getVectorNumElements() / 2;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned j = 0; j != 2; ++j) {
	unsigned Idx = WhichResult;
	for (unsigned i = 0; i != Half; ++i) {
	int MIdx = M[i + j * Half];
	if (MIdx >= 0 && (unsigned)MIdx != Idx)
	return false;
	Idx += 2;
	}
	}

	return true;
	}

	/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
	/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
	/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
	static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;
	WhichResult = (M[0] == 0 ? 0 : 1);
	for (unsigned i = 0; i < NumElts; i += 2) {
	if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) \|\|
	(M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
	return false;
	}
	return true;
	}

	static bool isINSMask(ArrayRef<int> M, int NumInputElements,
	bool &DstIsLeft, int &Anomaly) {
	if (M.size() != static_cast<size_t>(NumInputElements))
	return false;

	int NumLHSMatch = 0, NumRHSMatch = 0;
	int LastLHSMismatch = -1, LastRHSMismatch = -1;

	for (int i = 0; i < NumInputElements; ++i) {
	if (M[i] == -1) {
	++NumLHSMatch;
	++NumRHSMatch;
	continue;
	}

	if (M[i] == i)
	++NumLHSMatch;
	else
	LastLHSMismatch = i;

	if (M[i] == i + NumInputElements)
	++NumRHSMatch;
	else
	LastRHSMismatch = i;
	}

	if (NumLHSMatch == NumInputElements - 1) {
	DstIsLeft = true;
	Anomaly = LastLHSMismatch;
	return true;
	} else if (NumRHSMatch == NumInputElements - 1) {
	DstIsLeft = false;
	Anomaly = LastRHSMismatch;
	return true;
	}

	return false;
	}

	static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
	if (VT.getSizeInBits() != 128)
	return false;

	unsigned NumElts = VT.getVectorNumElements();

	for (int I = 0, E = NumElts / 2; I != E; I++) {
	if (Mask[I] != I)
	return false;
	}

	int Offset = NumElts / 2;
	for (int I = NumElts / 2, E = NumElts; I != E; I++) {
	if (Mask[I] != I + SplitLHS * Offset)
	return false;
	}

	return true;
	}

	static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue V0 = Op.getOperand(0);
	SDValue V1 = Op.getOperand(1);
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();

	if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() \|\|
	VT.getVectorElementType() != V1.getValueType().getVectorElementType())
	return SDValue();

	bool SplitV0 = V0.getValueSizeInBits() == 128;

	if (!isConcatMask(Mask, VT, SplitV0))
	return SDValue();

	EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	if (SplitV0) {
	V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
	DAG.getConstant(0, DL, MVT::i64));
	}
	if (V1.getValueSizeInBits() == 128) {
	V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
	DAG.getConstant(0, DL, MVT::i64));
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
	}

	/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
	/// the specified operations to build the shuffle. ID is the perfect-shuffle
	//ID, V1 and V2 are the original shuffle inputs. PFEntry is the Perfect shuffle
	//table entry and LHS/RHS are the immediate inputs for this stage of the
	//shuffle.
	static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
	SDValue V2, unsigned PFEntry, SDValue LHS,
	SDValue RHS, SelectionDAG &DAG,
	const SDLoc &dl) {
	unsigned OpNum = (PFEntry >> 26) & 0x0F;
	unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
	unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);

	enum {
	OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
	OP_VREV,
	OP_VDUP0,
	OP_VDUP1,
	OP_VDUP2,
	OP_VDUP3,
	OP_VEXT1,
	OP_VEXT2,
	OP_VEXT3,
	OP_VUZPL, // VUZP, left result
	OP_VUZPR, // VUZP, right result
	OP_VZIPL, // VZIP, left result
	OP_VZIPR, // VZIP, right result
	OP_VTRNL, // VTRN, left result
	OP_VTRNR, // VTRN, right result
	OP_MOVLANE // Move lane. RHSID is the lane to move into
	};

	if (OpNum == OP_COPY) {
	if (LHSID == (1 * 9 + 2) * 9 + 3)
	return LHS;
	assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
	return RHS;
	}

	if (OpNum == OP_MOVLANE) {
	// Decompose a PerfectShuffle ID to get the Mask for lane Elt
	auto getPFIDLane = [](unsigned ID, int Elt) -> int {
	assert(Elt < 4 && "Expected Perfect Lanes to be less than 4");
	Elt = 3 - Elt;
	while (Elt > 0) {
	ID /= 9;
	Elt--;
	}
	return (ID % 9 == 8) ? -1 : ID % 9;
	};

	// For OP_MOVLANE shuffles, the RHSID represents the lane to move into. We
	// get the lane to move from from the PFID, which is always from the
	// original vectors (V1 or V2).
	SDValue OpLHS = GeneratePerfectShuffle(
	LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
	EVT VT = OpLHS.getValueType();
	assert(RHSID < 8 && "Expected a lane index for RHSID!");
	unsigned ExtLane = 0;
	SDValue Input;

	// OP_MOVLANE are either D movs (if bit 0x4 is set) or S movs. D movs
	// convert into a higher type.
	if (RHSID & 0x4) {
	int MaskElt = getPFIDLane(ID, (RHSID & 0x01) << 1) >> 1;
	if (MaskElt == -1)
	MaskElt = (getPFIDLane(ID, ((RHSID & 0x01) << 1) + 1) - 1) >> 1;
	assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
	ExtLane = MaskElt < 2 ? MaskElt : (MaskElt - 2);
	Input = MaskElt < 2 ? V1 : V2;
	if (VT.getScalarSizeInBits() == 16) {
	Input = DAG.getBitcast(MVT::v2f32, Input);
	OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
	} else {
	assert(VT.getScalarSizeInBits() == 32 &&
	"Expected 16 or 32 bit shuffle elemements");
	Input = DAG.getBitcast(MVT::v2f64, Input);
	OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
	}
	} else {
	int MaskElt = getPFIDLane(ID, RHSID);
	assert(MaskElt >= 0 && "Didn't expect an undef movlane index!");
	ExtLane = MaskElt < 4 ? MaskElt : (MaskElt - 4);
	Input = MaskElt < 4 ? V1 : V2;
	// Be careful about creating illegal types. Use f16 instead of i16.
	if (VT == MVT::v4i16) {
	Input = DAG.getBitcast(MVT::v4f16, Input);
	OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
	}
	}
	SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
	Input.getValueType().getVectorElementType(),
	Input, DAG.getVectorIdxConstant(ExtLane, dl));
	SDValue Ins =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
	Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
	return DAG.getBitcast(VT, Ins);
	}

	SDValue OpLHS, OpRHS;
	OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
	RHS, DAG, dl);
	OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
	RHS, DAG, dl);
	EVT VT = OpLHS.getValueType();

	switch (OpNum) {
	default:
	llvm_unreachable("Unknown shuffle opcode!");
	case OP_VREV:
	// VREV divides the vector in half and swaps within the half.
	if (VT.getVectorElementType() == MVT::i32 \|\|
	VT.getVectorElementType() == MVT::f32)
	return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
	// vrev <4 x i16> -> REV32
	if (VT.getVectorElementType() == MVT::i16 \|\|
	VT.getVectorElementType() == MVT::f16 \|\|
	VT.getVectorElementType() == MVT::bf16)
	return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
	// vrev <4 x i8> -> REV16
	assert(VT.getVectorElementType() == MVT::i8);
	return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
	case OP_VDUP0:
	case OP_VDUP1:
	case OP_VDUP2:
	case OP_VDUP3: {
	EVT EltTy = VT.getVectorElementType();
	unsigned Opcode;
	if (EltTy == MVT::i8)
	Opcode = AArch64ISD::DUPLANE8;
	else if (EltTy == MVT::i16 \|\| EltTy == MVT::f16 \|\| EltTy == MVT::bf16)
	Opcode = AArch64ISD::DUPLANE16;
	else if (EltTy == MVT::i32 \|\| EltTy == MVT::f32)
	Opcode = AArch64ISD::DUPLANE32;
	else if (EltTy == MVT::i64 \|\| EltTy == MVT::f64)
	Opcode = AArch64ISD::DUPLANE64;
	else
	llvm_unreachable("Invalid vector element type?");

	if (VT.getSizeInBits() == 64)
	OpLHS = WidenVector(OpLHS, DAG);
	SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
	return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
	}
	case OP_VEXT1:
	case OP_VEXT2:
	case OP_VEXT3: {
	unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
	return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
	DAG.getConstant(Imm, dl, MVT::i32));
	}
	case OP_VUZPL:
	return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VUZPR:
	return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VZIPL:
	return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VZIPR:
	return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VTRNL:
	return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	case OP_VTRNR:
	return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
	OpRHS);
	}
	}

	static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
	SelectionDAG &DAG) {
	// Check to see if we can use the TBL instruction.
	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);
	SDLoc DL(Op);

	EVT EltVT = Op.getValueType().getVectorElementType();
	unsigned BytesPerElt = EltVT.getSizeInBits() / 8;

	bool Swap = false;
	if (V1.isUndef() \|\| isZerosVector(V1.getNode())) {
	std::swap(V1, V2);
	Swap = true;
	}

	// If the V2 source is undef or zero then we can use a tbl1, as tbl1 will fill
	// out of range values with 0s. We do need to make sure that any out-of-range
	// values are really out-of-range for a v16i8 vector.
	bool IsUndefOrZero = V2.isUndef() \|\| isZerosVector(V2.getNode());
	MVT IndexVT = MVT::v8i8;
	unsigned IndexLen = 8;
	if (Op.getValueSizeInBits() == 128) {
	IndexVT = MVT::v16i8;
	IndexLen = 16;
	}

	SmallVector<SDValue, 8> TBLMask;
	for (int Val : ShuffleMask) {
	for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
	unsigned Offset = Byte + Val * BytesPerElt;
	if (Swap)
	Offset = Offset < IndexLen ? Offset + IndexLen : Offset - IndexLen;
	if (IsUndefOrZero && Offset >= IndexLen)
	Offset = 255;
	TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
	}
	}

	SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
	SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);

	SDValue Shuffle;
	if (IsUndefOrZero) {
	if (IndexLen == 8)
	V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
	DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	} else {
	if (IndexLen == 8) {
	V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
	DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	} else {
	// FIXME: We cannot, for the moment, emit a TBL2 instruction because we
	// cannot currently represent the register constraints on the input
	// table registers.
	// Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
	// DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
	// IndexLen));
	Shuffle = DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
	DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
	V2Cst, DAG.getBuildVector(IndexVT, DL,
	makeArrayRef(TBLMask.data(), IndexLen)));
	}
	}
	return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
	}

	static unsigned getDUPLANEOp(EVT EltType) {
	if (EltType == MVT::i8)
	return AArch64ISD::DUPLANE8;
	if (EltType == MVT::i16 \|\| EltType == MVT::f16 \|\| EltType == MVT::bf16)
	return AArch64ISD::DUPLANE16;
	if (EltType == MVT::i32 \|\| EltType == MVT::f32)
	return AArch64ISD::DUPLANE32;
	if (EltType == MVT::i64 \|\| EltType == MVT::f64)
	return AArch64ISD::DUPLANE64;

	llvm_unreachable("Invalid vector element type?");
	}

	static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
	unsigned Opcode, SelectionDAG &DAG) {
	// Try to eliminate a bitcasted extract subvector before a DUPLANE.
	auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
	// Match: dup (bitcast (extract_subv X, C)), LaneC
	if (BitCast.getOpcode() != ISD::BITCAST \|\|
	BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return false;

	// The extract index must align in the destination type. That may not
	// happen if the bitcast is from narrow to wide type.
	SDValue Extract = BitCast.getOperand(0);
	unsigned ExtIdx = Extract.getConstantOperandVal(1);
	unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
	unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
	unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
	if (ExtIdxInBits % CastedEltBitWidth != 0)
	return false;

	// Can't handle cases where vector size is not 128-bit
	if (!Extract.getOperand(0).getValueType().is128BitVector())
	return false;

	// Update the lane value by offsetting with the scaled extract index.
	LaneC += ExtIdxInBits / CastedEltBitWidth;

	// Determine the casted vector type of the wide vector input.
	// dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
	// Examples:
	// dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
	// dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
	unsigned SrcVecNumElts =
	Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
	CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
	SrcVecNumElts);
	return true;
	};
	MVT CastVT;
	if (getScaledOffsetDup(V, Lane, CastVT)) {
	V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
	} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	V.getOperand(0).getValueType().is128BitVector()) {
	// The lane is incremented by the index of the extract.
	// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
	Lane += V.getConstantOperandVal(1);
	V = V.getOperand(0);
	} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
	// The lane is decremented if we are splatting from the 2nd operand.
	// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
	unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
	Lane -= Idx * VT.getVectorNumElements() / 2;
	V = WidenVector(V.getOperand(Idx), DAG);
	} else if (VT.getSizeInBits() == 64) {
	// Widen the operand to 128-bit register with undef.
	V = WidenVector(V, DAG);
	}
	return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
	}

	// Return true if we can get a new shuffle mask by checking the parameter mask
	// array to test whether every two adjacent mask values are continuous and
	// starting from an even number.
	static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
	SmallVectorImpl<int> &NewMask) {
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts % 2 != 0)
	return false;

	NewMask.clear();
	for (unsigned i = 0; i < NumElts; i += 2) {
	int M0 = M[i];
	int M1 = M[i + 1];

	// If both elements are undef, new mask is undef too.
	if (M0 == -1 && M1 == -1) {
	NewMask.push_back(-1);
	continue;
	}

	if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
	NewMask.push_back(M1 / 2);
	continue;
	}

	if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 \|\| M1 == -1)) {
	NewMask.push_back(M0 / 2);
	continue;
	}

	NewMask.clear();
	return false;
	}

	assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
	return true;
	}

	// Try to widen element type to get a new mask value for a better permutation
	// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
	// UZP1/2, TRN1/2, REV, INS, etc.
	// For example:
	// shufflevector <4 x i32> %a, <4 x i32> %b,
	// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
	// is equivalent to:
	// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
	// Finally, we can get:
	// mov v0.d[0], v1.d[1]
	static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT ScalarVT = VT.getVectorElementType();
	unsigned ElementSize = ScalarVT.getFixedSizeInBits();
	SDValue V0 = Op.getOperand(0);
	SDValue V1 = Op.getOperand(1);
	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();

	// If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
	// We need to make sure the wider element type is legal. Thus, ElementSize
	// should be not larger than 32 bits, and i1 type should also be excluded.
	if (ElementSize > 32 \|\| ElementSize == 1)
	return SDValue();

	SmallVector<int, 8> NewMask;
	if (isWideTypeMask(Mask, VT, NewMask)) {
	MVT NewEltVT = VT.isFloatingPoint()
	? MVT::getFloatingPointVT(ElementSize * 2)
	: MVT::getIntegerVT(ElementSize * 2);
	MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
	if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
	V0 = DAG.getBitcast(NewVT, V0);
	V1 = DAG.getBitcast(NewVT, V1);
	return DAG.getBitcast(VT,
	DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
	}
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();

	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);

	// Convert shuffles that are directly supported on NEON to target-specific
	// DAG nodes, instead of keeping them as shuffles and matching them again
	// during code selection. This is more efficient and avoids the possibility
	// of inconsistencies between legalization and selection.
	ArrayRef<int> ShuffleMask = SVN->getMask();

	SDValue V1 = Op.getOperand(0);
	SDValue V2 = Op.getOperand(1);

	assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!");
	assert(ShuffleMask.size() == VT.getVectorNumElements() &&
	"Unexpected VECTOR_SHUFFLE mask size!");

	if (SVN->isSplat()) {
	int Lane = SVN->getSplatIndex();
	// If this is undef splat, generate it via "just" vdup, if possible.
	if (Lane == -1)
	Lane = 0;

	if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
	return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
	V1.getOperand(0));
	// Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
	// constant. If so, we can just reference the lane's definition directly.
	if (V1.getOpcode() == ISD::BUILD_VECTOR &&
	!isa<ConstantSDNode>(V1.getOperand(Lane)))
	return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));

	// Otherwise, duplicate from the lane of the input vector.
	unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
	return constructDup(V1, Lane, dl, VT, Opcode, DAG);
	}

	// Check if the mask matches a DUP for a wider element
	for (unsigned LaneSize : {64U, 32U, 16U}) {
	unsigned Lane = 0;
	if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
	unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
	: LaneSize == 32 ? AArch64ISD::DUPLANE32
	: AArch64ISD::DUPLANE16;
	// Cast V1 to an integer vector with required lane size
	MVT NewEltTy = MVT::getIntegerVT(LaneSize);
	unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
	MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
	V1 = DAG.getBitcast(NewVecTy, V1);
	// Constuct the DUP instruction
	V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
	// Cast back to the original type
	return DAG.getBitcast(VT, V1);
	}
	}

	if (isREVMask(ShuffleMask, VT, 64))
	return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
	if (isREVMask(ShuffleMask, VT, 32))
	return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
	if (isREVMask(ShuffleMask, VT, 16))
	return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);

	if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) \|\|
	(VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
	ShuffleVectorInst::isReverseMask(ShuffleMask)) {
	SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
	return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
	DAG.getConstant(8, dl, MVT::i32));
	}

	bool ReverseEXT = false;
	unsigned Imm;
	if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
	if (ReverseEXT)
	std::swap(V1, V2);
	Imm *= getExtFactor(V1);
	return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
	DAG.getConstant(Imm, dl, MVT::i32));
	} else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
	Imm *= getExtFactor(V1);
	return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
	DAG.getConstant(Imm, dl, MVT::i32));
	}

	unsigned WhichResult;
	if (isZIPMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}
	if (isUZPMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}
	if (isTRNMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
	}

	if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}
	if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}
	if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
	}

	if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
	return Concat;

	bool DstIsLeft;
	int Anomaly;
	int NumInputElements = V1.getValueType().getVectorNumElements();
	if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
	SDValue DstVec = DstIsLeft ? V1 : V2;
	SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);

	SDValue SrcVec = V1;
	int SrcLane = ShuffleMask[Anomaly];
	if (SrcLane >= NumInputElements) {
	SrcVec = V2;
	SrcLane -= VT.getVectorNumElements();
	}
	SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);

	EVT ScalarVT = VT.getVectorElementType();

	if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
	ScalarVT = MVT::i32;

	return DAG.getNode(
	ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
	DstLaneV);
	}

	if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
	return NewSD;

	// If the shuffle is not directly supported and it has 4 elements, use
	// the PerfectShuffle-generated table to synthesize it from other shuffles.
	unsigned NumElts = VT.getVectorNumElements();
	if (NumElts == 4) {
	unsigned PFIndexes[4];
	for (unsigned i = 0; i != 4; ++i) {
	if (ShuffleMask[i] < 0)
	PFIndexes[i] = 8;
	else
	PFIndexes[i] = ShuffleMask[i];
	}

	// Compute the index in the perfect shuffle table.
	unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
	PFIndexes[2] * 9 + PFIndexes[3];
	unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
	return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
	dl);
	}

	return GenerateTBL(Op, ShuffleMask, DAG);
	}

	SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerToScalableOp(Op, DAG);

	assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
	"Unexpected vector type!");

	// We can handle the constant cases during isel.
	if (isa<ConstantSDNode>(Op.getOperand(0)))
	return Op;

	// There isn't a natural way to handle the general i1 case, so we use some
	// trickery with whilelo.
	SDLoc DL(Op);
	SDValue SplatVal = DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, MVT::i64);
	SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, SplatVal,
	DAG.getValueType(MVT::i1));
	SDValue ID =
	DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	if (VT == MVT::nxv1i1)
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::nxv1i1,
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv2i1, ID,
	Zero, SplatVal),
	Zero);
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, ID, Zero, SplatVal);
	}

	SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);

	EVT VT = Op.getValueType();
	if (!isTypeLegal(VT) \|\| !VT.isScalableVector())
	return SDValue();

	// Current lowering only supports the SVE-ACLE types.
	if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
	return SDValue();

	// The DUPQ operation is indepedent of element type so normalise to i64s.
	SDValue Idx128 = Op.getOperand(2);

	// DUPQ can be used when idx is in range.
	auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
	if (CIdx && (CIdx->getZExtValue() <= 3)) {
	SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
	return DAG.getNode(AArch64ISD::DUPLANE128, DL, VT, Op.getOperand(1), CI);
	}

	SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));

	// The ACLE says this must produce the same result as:
	// svtbl(data, svadd_x(svptrue_b64(),
	// svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
	// index * 2))
	SDValue One = DAG.getConstant(1, DL, MVT::i64);
	SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);

	// create the vector 0,1,0,1,...
	SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
	SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);

	// create the vector idx64,idx64+1,idx64,idx64+1,...
	SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
	SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
	SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);

	// create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
	SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
	return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
	}


	static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
	APInt &UndefBits) {
	EVT VT = BVN->getValueType(0);
	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
	unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;

	for (unsigned i = 0; i < NumSplats; ++i) {
	CnstBits <<= SplatBitSize;
	UndefBits <<= SplatBitSize;
	CnstBits \|= SplatBits.zextOrTrunc(VT.getSizeInBits());
	UndefBits \|= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
	}

	return true;
	}

	return false;
	}

	// Try 64-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;

	if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
	Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);

	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 32-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits,
	const SDValue *LHS = nullptr) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
	Shift = 0;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
	Shift = 8;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
	Shift = 16;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
	Shift = 24;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov;

	if (LHS)
	Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	else
	Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));

	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 16-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits,
	const SDValue *LHS = nullptr) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
	Shift = 0;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
	Shift = 8;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov;

	if (LHS)
	Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	else
	Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));

	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 32-bit splatted SIMD immediate with shifted ones.
	static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
	SelectionDAG &DAG, const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
	bool isAdvSIMDModImm = false;
	uint64_t Shift;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
	Shift = 264;
	}
	else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
	Shift = 272;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32),
	DAG.getConstant(Shift, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try 8-bit splatted SIMD immediate.
	static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;

	if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
	Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);

	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Try FP splatted SIMD immediate.
	static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
	const APInt &Bits) {
	if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
	uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
	EVT VT = Op.getValueType();
	bool isWide = (VT.getSizeInBits() == 128);
	MVT MovTy;
	bool isAdvSIMDModImm = false;

	if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
	MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
	}
	else if (isWide &&
	(isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
	Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
	MovTy = MVT::v2f64;
	}

	if (isAdvSIMDModImm) {
	SDLoc dl(Op);
	SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
	DAG.getConstant(Value, dl, MVT::i32));
	return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
	}
	}

	return SDValue();
	}

	// Specialized code to quickly find if PotentialBVec is a BuildVector that
	// consists of only the same constant int value, returned in reference arg
	// ConstVal
	static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
	uint64_t &ConstVal) {
	BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
	if (!Bvec)
	return false;
	ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
	if (!FirstElt)
	return false;
	EVT VT = Bvec->getValueType(0);
	unsigned NumElts = VT.getVectorNumElements();
	for (unsigned i = 1; i < NumElts; ++i)
	if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
	return false;
	ConstVal = FirstElt->getZExtValue();
	return true;
	}

	// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
	// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
	// BUILD_VECTORs with constant element C1, C2 is a constant, and:
	// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
	// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
	// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
	static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);

	if (!VT.isVector())
	return SDValue();

	SDLoc DL(N);

	SDValue And;
	SDValue Shift;

	SDValue FirstOp = N->getOperand(0);
	unsigned FirstOpc = FirstOp.getOpcode();
	SDValue SecondOp = N->getOperand(1);
	unsigned SecondOpc = SecondOp.getOpcode();

	// Is one of the operands an AND or a BICi? The AND may have been optimised to
	// a BICi in order to use an immediate instead of a register.
	// Is the other operand an shl or lshr? This will have been turned into:
	// AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
	if ((FirstOpc == ISD::AND \|\| FirstOpc == AArch64ISD::BICi) &&
	(SecondOpc == AArch64ISD::VSHL \|\| SecondOpc == AArch64ISD::VLSHR)) {
	And = FirstOp;
	Shift = SecondOp;

	} else if ((SecondOpc == ISD::AND \|\| SecondOpc == AArch64ISD::BICi) &&
	(FirstOpc == AArch64ISD::VSHL \|\| FirstOpc == AArch64ISD::VLSHR)) {
	And = SecondOp;
	Shift = FirstOp;
	} else
	return SDValue();

	bool IsAnd = And.getOpcode() == ISD::AND;
	bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;

	// Is the shift amount constant?
	ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
	if (!C2node)
	return SDValue();

	uint64_t C1;
	if (IsAnd) {
	// Is the and mask vector all constant?
	if (!isAllConstantBuildVector(And.getOperand(1), C1))
	return SDValue();
	} else {
	// Reconstruct the corresponding AND immediate from the two BICi immediates.
	ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
	ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
	assert(C1nodeImm && C1nodeShift);
	C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
	}

	// Is C1 == ~(Ones(ElemSizeInBits) << C2) or
	// C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
	// how much one can shift elements of a particular size?
	uint64_t C2 = C2node->getZExtValue();
	unsigned ElemSizeInBits = VT.getScalarSizeInBits();
	if (C2 > ElemSizeInBits)
	return SDValue();

	APInt C1AsAPInt(ElemSizeInBits, C1);
	APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
	: APInt::getLowBitsSet(ElemSizeInBits, C2);
	if (C1AsAPInt != RequiredC1)
	return SDValue();

	SDValue X = And.getOperand(0);
	SDValue Y = Shift.getOperand(0);

	unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
	SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));

	LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
	LLVM_DEBUG(N->dump(&DAG));
	LLVM_DEBUG(dbgs() << "into: \n");
	LLVM_DEBUG(ResultSLI->dump(&DAG));

	++NumShiftInserts;
	return ResultSLI;
	}

	SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
	SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerToScalableOp(Op, DAG);

	// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
	if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
	return Res;

	EVT VT = Op.getValueType();

	SDValue LHS = Op.getOperand(0);
	BuildVectorSDNode *BVN =
	dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
	if (!BVN) {
	// OR commutes, so try swapping the operands.
	LHS = Op.getOperand(1);
	BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
	}
	if (!BVN)
	return Op;

	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;

	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
	DefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
	DefBits, &LHS)))
	return NewOp;

	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
	UndefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
	UndefBits, &LHS)))
	return NewOp;
	}

	// We can always fall back to a non-immediate OR.
	return Op;
	}

	// Normalize the operands of BUILD_VECTOR. The value of constant operands will
	// be truncated to fit element width.
	static SDValue NormalizeBuildVector(SDValue Op,
	SelectionDAG &DAG) {
	assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT EltTy= VT.getVectorElementType();

	if (EltTy.isFloatingPoint() \|\| EltTy.getSizeInBits() > 16)
	return Op;

	SmallVector<SDValue, 16> Ops;
	for (SDValue Lane : Op->ops()) {
	// For integer vectors, type legalization would have promoted the
	// operands already. Otherwise, if Op is a floating-point splat
	// (with operands cast to integers), then the only possibilities
	// are constants and UNDEFs.
	if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
	APInt LowBits(EltTy.getSizeInBits(),
	CstLane->getZExtValue());
	Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
	} else if (Lane.getNode()->isUndef()) {
	Lane = DAG.getUNDEF(MVT::i32);
	} else {
	assert(Lane.getValueType() == MVT::i32 &&
	"Unexpected BUILD_VECTOR operand type");
	}
	Ops.push_back(Lane);
	}
	return DAG.getBuildVector(VT, dl, Ops);
	}

	static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
	EVT VT = Op.getValueType();

	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;
	if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
	return NewOp;

	DefBits = ~DefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
	return NewOp;

	DefBits = UndefBits;
	if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
	return NewOp;

	DefBits = ~UndefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
	return NewOp;
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (useSVEForFixedLengthVectorVT(VT)) {
	if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
	SDLoc DL(Op);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	SDValue Start = DAG.getConstant(SeqInfo->first, DL, ContainerVT);
	SDValue Steps = DAG.getStepVector(DL, ContainerVT, SeqInfo->second);
	SDValue Seq = DAG.getNode(ISD::ADD, DL, ContainerVT, Start, Steps);
	return convertFromScalableVector(DAG, Op.getValueType(), Seq);
	}

	// Revert to common legalisation for all other variants.
	return SDValue();
	}

	// Try to build a simple constant vector.
	Op = NormalizeBuildVector(Op, DAG);
	if (VT.isInteger()) {
	// Certain vector constants, used to express things like logical NOT and
	// arithmetic NEG, are passed through unmodified. This allows special
	// patterns for these operations to match, which will lower these constants
	// to whatever is proven necessary.
	BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
	if (BVN->isConstant())
	if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
	unsigned BitSize = VT.getVectorElementType().getSizeInBits();
	APInt Val(BitSize,
	Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
	if (Val.isZero() \|\| Val.isAllOnes())
	return Op;
	}
	}

	if (SDValue V = ConstantBuildVector(Op, DAG))
	return V;

	// Scan through the operands to find some interesting properties we can
	// exploit:
	// 1) If only one value is used, we can use a DUP, or
	// 2) if only the low element is not undef, we can just insert that, or
	// 3) if only one constant value is used (w/ some non-constant lanes),
	// we can splat the constant value into the whole vector then fill
	// in the non-constant lanes.
	// 4) FIXME: If different constant values are used, but we can intelligently
	// select the values we'll be overwriting for the non-constant
	// lanes such that we can directly materialize the vector
	// some other way (MOVI, e.g.), we can be sneaky.
	// 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
	SDLoc dl(Op);
	unsigned NumElts = VT.getVectorNumElements();
	bool isOnlyLowElement = true;
	bool usesOnlyOneValue = true;
	bool usesOnlyOneConstantValue = true;
	bool isConstant = true;
	bool AllLanesExtractElt = true;
	unsigned NumConstantLanes = 0;
	unsigned NumDifferentLanes = 0;
	unsigned NumUndefLanes = 0;
	SDValue Value;
	SDValue ConstantValue;
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	AllLanesExtractElt = false;
	if (V.isUndef()) {
	++NumUndefLanes;
	continue;
	}
	if (i > 0)
	isOnlyLowElement = false;
	if (!isIntOrFPConstant(V))
	isConstant = false;

	if (isIntOrFPConstant(V)) {
	++NumConstantLanes;
	if (!ConstantValue.getNode())
	ConstantValue = V;
	else if (ConstantValue != V)
	usesOnlyOneConstantValue = false;
	}

	if (!Value.getNode())
	Value = V;
	else if (V != Value) {
	usesOnlyOneValue = false;
	++NumDifferentLanes;
	}
	}

	if (!Value.getNode()) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
	return DAG.getUNDEF(VT);
	}

	// Convert BUILD_VECTOR where all elements but the lowest are undef into
	// SCALAR_TO_VECTOR, except for when we have a single-element constant vector
	// as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
	if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
	LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
	"SCALAR_TO_VECTOR node\n");
	return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
	}

	if (AllLanesExtractElt) {
	SDNode *Vector = nullptr;
	bool Even = false;
	bool Odd = false;
	// Check whether the extract elements match the Even pattern <0,2,4,...> or
	// the Odd pattern <1,3,5,...>.
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	const SDNode *N = V.getNode();
	if (!isa<ConstantSDNode>(N->getOperand(1)))
	break;
	SDValue N0 = N->getOperand(0);

	// All elements are extracted from the same vector.
	if (!Vector) {
	Vector = N0.getNode();
	// Check that the type of EXTRACT_VECTOR_ELT matches the type of
	// BUILD_VECTOR.
	if (VT.getVectorElementType() !=
	N0.getValueType().getVectorElementType())
	break;
	} else if (Vector != N0.getNode()) {
	Odd = false;
	Even = false;
	break;
	}

	// Extracted values are either at Even indices <0,2,4,...> or at Odd
	// indices <1,3,5,...>.
	uint64_t Val = N->getConstantOperandVal(1);
	if (Val == 2 * i) {
	Even = true;
	continue;
	}
	if (Val - 1 == 2 * i) {
	Odd = true;
	continue;
	}

	// Something does not match: abort.
	Odd = false;
	Even = false;
	break;
	}
	if (Even \|\| Odd) {
	SDValue LHS =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
	DAG.getConstant(0, dl, MVT::i64));
	SDValue RHS =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
	DAG.getConstant(NumElts, dl, MVT::i64));

	if (Even && !Odd)
	return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
	RHS);
	if (Odd && !Even)
	return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
	RHS);
	}
	}

	// Use DUP for non-constant splats. For f32 constant splats, reduce to
	// i32 and try again.
	if (usesOnlyOneValue) {
	if (!isConstant) {
	if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Value.getValueType() != VT) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
	return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
	}

	// This is actually a DUPLANExx operation, which keeps everything vectory.

	SDValue Lane = Value.getOperand(1);
	Value = Value.getOperand(0);
	if (Value.getValueSizeInBits() == 64) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
	"widening it\n");
	Value = WidenVector(Value, DAG);
	}

	unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
	return DAG.getNode(Opcode, dl, VT, Value, Lane);
	}

	if (VT.getVectorElementType().isFloatingPoint()) {
	SmallVector<SDValue, 8> Ops;
	EVT EltTy = VT.getVectorElementType();
	assert ((EltTy == MVT::f16 \|\| EltTy == MVT::bf16 \|\| EltTy == MVT::f32 \|\|
	EltTy == MVT::f64) && "Unsupported floating-point vector type");
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
	"BITCASTS, and try again\n");
	MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
	for (unsigned i = 0; i < NumElts; ++i)
	Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
	EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
	SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
	LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
	Val.dump(););
	Val = LowerBUILD_VECTOR(Val, DAG);
	if (Val.getNode())
	return DAG.getNode(ISD::BITCAST, dl, VT, Val);
	}
	}

	// If we need to insert a small number of different non-constant elements and
	// the vector width is sufficiently large, prefer using DUP with the common
	// value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
	// skip the constant lane handling below.
	bool PreferDUPAndInsert =
	!isConstant && NumDifferentLanes >= 1 &&
	NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
	NumDifferentLanes >= NumConstantLanes;

	// If there was only one constant value used and for more than one lane,
	// start by splatting that value, then replace the non-constant lanes. This
	// is better than the default, which will perform a separate initialization
	// for each lane.
	if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
	// Firstly, try to materialize the splat constant.
	SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
	Val = ConstantBuildVector(Vec, DAG);
	if (!Val) {
	// Otherwise, materialize the constant and splat it.
	Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
	DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
	}

	// Now insert the non-constant lanes.
	for (unsigned i = 0; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
	if (!isIntOrFPConstant(V))
	// Note that type legalization likely mucked about with the VT of the
	// source operand, so we may have to convert it here before inserting.
	Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
	}
	return Val;
	}

	// This will generate a load from the constant pool.
	if (isConstant) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
	"expansion\n");
	return SDValue();
	}

	// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from
	// v4i32s. This is really a truncate, which we can construct out of (legal)
	// concats and truncate nodes.
	if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG))
	return M;

	// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
	if (NumElts >= 4) {
	if (SDValue shuffle = ReconstructShuffle(Op, DAG))
	return shuffle;
	}

	if (PreferDUPAndInsert) {
	// First, build a constant vector with the common element.
	SmallVector<SDValue, 8> Ops(NumElts, Value);
	SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
	// Next, insert the elements that do not match the common value.
	for (unsigned I = 0; I < NumElts; ++I)
	if (Op.getOperand(I) != Value)
	NewVector =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
	Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));

	return NewVector;
	}

	// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
	// know the default expansion would otherwise fall back on something even
	// worse. For a vector with one or two non-undef values, that's
	// scalar_to_vector for the elements followed by a shuffle (provided the
	// shuffle is valid for the target) and materialization element by element
	// on the stack followed by a load for everything else.
	if (!isConstant && !usesOnlyOneValue) {
	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
	"of INSERT_VECTOR_ELT\n");

	SDValue Vec = DAG.getUNDEF(VT);
	SDValue Op0 = Op.getOperand(0);
	unsigned i = 0;

	// Use SCALAR_TO_VECTOR for lane zero to
	// a) Avoid a RMW dependency on the full vector register, and
	// b) Allow the register coalescer to fold away the copy if the
	// value is already in an S or D register, and we're forced to emit an
	// INSERT_SUBREG that we can't fold anywhere.
	//
	// We also allow types like i8 and i16 which are illegal scalar but legal
	// vector element types. After type-legalization the inserted value is
	// extended (i32) and it is safe to cast them to the vector type by ignoring
	// the upper bits of the lowest lane (e.g. v8i8, v4i16).
	if (!Op0.isUndef()) {
	LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
	Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
	++i;
	}
	LLVM_DEBUG(if (i < NumElts) dbgs()
	<< "Creating nodes for the other vector elements:\n";);
	for (; i < NumElts; ++i) {
	SDValue V = Op.getOperand(i);
	if (V.isUndef())
	continue;
	SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
	}
	return Vec;
	}

	LLVM_DEBUG(
	dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
	"better alternative\n");
	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
	SelectionDAG &DAG) const {
	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthConcatVectorsToSVE(Op, DAG);

	assert(Op.getValueType().isScalableVector() &&
	isTypeLegal(Op.getValueType()) &&
	"Expected legal scalable vector type!");

	if (isTypeLegal(Op.getOperand(0).getValueType())) {
	unsigned NumOperands = Op->getNumOperands();
	assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
	"Unexpected number of operands in CONCAT_VECTORS");

	if (NumOperands == 2)
	return Op;

	// Concat each pair of subvectors and pack into the lower half of the array.
	SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
	while (ConcatOps.size() > 1) {
	for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
	SDValue V1 = ConcatOps[I];
	SDValue V2 = ConcatOps[I + 1];
	EVT SubVT = V1.getValueType();
	EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
	ConcatOps[I / 2] =
	DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
	}
	ConcatOps.resize(ConcatOps.size() / 2);
	}
	return ConcatOps[0];
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");

	if (useSVEForFixedLengthVectorVT(Op.getValueType()))
	return LowerFixedLengthInsertVectorElt(Op, DAG);

	// Check for non-constant or out of range lane.
	EVT VT = Op.getOperand(0).getValueType();

	if (VT.getScalarType() == MVT::i1) {
	EVT VectorVT = getPromotedVTForPredicate(VT);
	SDLoc DL(Op);
	SDValue ExtendedVector =
	DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
	SDValue ExtendedValue =
	DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
	VectorVT.getScalarType().getSizeInBits() < 32
	? MVT::i32
	: VectorVT.getScalarType());
	ExtendedVector =
	DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
	ExtendedValue, Op.getOperand(2));
	return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
	}

	ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
	if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();

	// Insertion/extraction are legal for V128 types.
	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::v8f16 \|\| VT == MVT::v8bf16)
	return Op;

	if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
	VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
	VT != MVT::v4bf16)
	return SDValue();

	// For V64 types, we perform insertion by expanding the value
	// to a V128 type and perform the insertion on that.
	SDLoc DL(Op);
	SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
	EVT WideTy = WideVec.getValueType();

	SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
	Op.getOperand(1), Op.getOperand(2));
	// Re-narrow the resultant vector.
	return NarrowVector(Node, DAG);
	}

	SDValue
	AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
	EVT VT = Op.getOperand(0).getValueType();

	if (VT.getScalarType() == MVT::i1) {
	// We can't directly extract from an SVE predicate; extend it first.
	// (This isn't the only possible lowering, but it's straightforward.)
	EVT VectorVT = getPromotedVTForPredicate(VT);
	SDLoc DL(Op);
	SDValue Extend =
	DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
	MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
	SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
	Extend, Op.getOperand(1));
	return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
	}

	if (useSVEForFixedLengthVectorVT(VT))
	return LowerFixedLengthExtractVectorElt(Op, DAG);

	// Check for non-constant or out of range lane.
	ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
	if (!CI \|\| CI->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();

	// Insertion/extraction are legal for V128 types.
	if (VT == MVT::v16i8 \|\| VT == MVT::v8i16 \|\| VT == MVT::v4i32 \|\|
	VT == MVT::v2i64 \|\| VT == MVT::v4f32 \|\| VT == MVT::v2f64 \|\|
	VT == MVT::v8f16 \|\| VT == MVT::v8bf16)
	return Op;

	if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
	VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
	VT != MVT::v4bf16)
	return SDValue();

	// For V64 types, we perform extraction by expanding the value
	// to a V128 type and perform the extraction on that.
	SDLoc DL(Op);
	SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
	EVT WideTy = WideVec.getValueType();

	EVT ExtrTy = WideTy.getVectorElementType();
	if (ExtrTy == MVT::i16 \|\| ExtrTy == MVT::i8)
	ExtrTy = MVT::i32;

	// For extractions, we just return the result directly.
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
	Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getValueType().isFixedLengthVector() &&
	"Only cases that extract a fixed length vector are supported!");

	EVT InVT = Op.getOperand(0).getValueType();
	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	unsigned Size = Op.getValueSizeInBits();

	// If we don't have legal types yet, do nothing
	if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
	return SDValue();

	if (InVT.isScalableVector()) {
	// This will be matched by custom code during ISelDAGToDAG.
	if (Idx == 0 && isPackedVectorType(InVT, DAG))
	return Op;

	return SDValue();
	}

	// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
	if (Idx == 0 && InVT.getSizeInBits() <= 128)
	return Op;

	// If this is extracting the upper 64-bits of a 128-bit vector, we match
	// that directly.
	if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
	InVT.getSizeInBits() == 128)
	return Op;

	if (useSVEForFixedLengthVectorVT(InVT)) {
	SDLoc DL(Op);

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue NewInVec =
	convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));

	SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
	NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
	return convertFromScalableVector(DAG, Op.getValueType(), Splice);
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Op.getValueType().isScalableVector() &&
	"Only expect to lower inserts into scalable vectors!");

	EVT InVT = Op.getOperand(1).getValueType();
	unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();

	SDValue Vec0 = Op.getOperand(0);
	SDValue Vec1 = Op.getOperand(1);
	SDLoc DL(Op);
	EVT VT = Op.getValueType();

	if (InVT.isScalableVector()) {
	if (!isTypeLegal(VT))
	return SDValue();

	// Break down insert_subvector into simpler parts.
	if (VT.getVectorElementType() == MVT::i1) {
	unsigned NumElts = VT.getVectorMinNumElements();
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());

	SDValue Lo, Hi;
	Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
	DAG.getVectorIdxConstant(0, DL));
	Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
	DAG.getVectorIdxConstant(NumElts / 2, DL));
	if (Idx < (NumElts / 2)) {
	SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
	DAG.getVectorIdxConstant(Idx, DL));
	return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
	} else {
	SDValue NewHi =
	DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
	DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
	return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
	}
	}

	// Ensure the subvector is half the size of the main vector.
	if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
	return SDValue();

	// Here narrow and wide refers to the vector element types. After "casting"
	// both vectors must have the same bit length and so because the subvector
	// has fewer elements, those elements need to be bigger.
	EVT NarrowVT = getPackedSVEVectorVT(VT.getVectorElementCount());
	EVT WideVT = getPackedSVEVectorVT(InVT.getVectorElementCount());

	// NOP cast operands to the largest legal vector of the same element count.
	if (VT.isFloatingPoint()) {
	Vec0 = getSVESafeBitCast(NarrowVT, Vec0, DAG);
	Vec1 = getSVESafeBitCast(WideVT, Vec1, DAG);
	} else {
	// Legal integer vectors are already their largest so Vec0 is fine as is.
	Vec1 = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
	}

	// To replace the top/bottom half of vector V with vector SubV we widen the
	// preserved half of V, concatenate this to SubV (the order depending on the
	// half being replaced) and then narrow the result.
	SDValue Narrow;
	if (Idx == 0) {
	SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
	Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, Vec1, HiVec0);
	} else {
	assert(Idx == InVT.getVectorMinNumElements() &&
	"Invalid subvector index!");
	SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
	Narrow = DAG.getNode(AArch64ISD::UZP1, DL, NarrowVT, LoVec0, Vec1);
	}

	return getSVESafeBitCast(VT, Narrow, DAG);
	}

	if (Idx == 0 && isPackedVectorType(VT, DAG)) {
	// This will be matched by custom code during ISelDAGToDAG.
	if (Vec0.isUndef())
	return Op;

	Optional<unsigned> PredPattern =
	getSVEPredPatternFromNumElements(InVT.getVectorNumElements());
	auto PredTy = VT.changeVectorElementType(MVT::i1);
	SDValue PTrue = getPTrue(DAG, DL, PredTy, *PredPattern);
	SDValue ScalableVec1 = convertToScalableVector(DAG, VT, Vec1);
	return DAG.getNode(ISD::VSELECT, DL, VT, PTrue, ScalableVec1, Vec0);
	}

	return SDValue();
	}

	static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
	if (Op.getOpcode() != AArch64ISD::DUP &&
	Op.getOpcode() != ISD::SPLAT_VECTOR &&
	Op.getOpcode() != ISD::BUILD_VECTOR)
	return false;

	if (Op.getOpcode() == ISD::BUILD_VECTOR &&
	!isAllConstantBuildVector(Op, SplatVal))
	return false;

	if (Op.getOpcode() != ISD::BUILD_VECTOR &&
	!isa<ConstantSDNode>(Op->getOperand(0)))
	return false;

	SplatVal = Op->getConstantOperandVal(0);
	if (Op.getValueType().getVectorElementType() != MVT::i64)
	SplatVal = (int32_t)SplatVal;

	Negated = false;
	if (isPowerOf2_64(SplatVal))
	return true;

	Negated = true;
	if (isPowerOf2_64(-SplatVal)) {
	SplatVal = -SplatVal;
	return true;
	}

	return false;
	}

	SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc dl(Op);

	if (useSVEForFixedLengthVectorVT(VT, /OverrideNEON=/true))
	return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);

	assert(VT.isScalableVector() && "Expected a scalable vector.");

	bool Signed = Op.getOpcode() == ISD::SDIV;
	unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

	bool Negated;
	uint64_t SplatVal;
	if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
	SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
	SDValue Res =
	DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
	DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
	if (Negated)
	Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);

	return Res;
	}

	if (VT == MVT::nxv4i32 \|\| VT == MVT::nxv2i64)
	return LowerToPredicatedOp(Op, DAG, PredOpcode);

	// SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
	// operations, and truncate the result.
	EVT WidenedVT;
	if (VT == MVT::nxv16i8)
	WidenedVT = MVT::nxv8i16;
	else if (VT == MVT::nxv8i16)
	WidenedVT = MVT::nxv4i32;
	else
	llvm_unreachable("Unexpected Custom DIV operation");

	unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
	unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
	SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
	SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
	SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
	SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
	return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
	}

	bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
	// Currently no fixed length shuffles that require SVE are legal.
	if (useSVEForFixedLengthVectorVT(VT))
	return false;

	if (VT.getVectorNumElements() == 4 &&
	(VT.is128BitVector() \|\| VT.is64BitVector())) {
	unsigned Cost = getPerfectShuffleCost(M);
	if (Cost <= 1)
	return true;
	}

	bool DummyBool;
	int DummyInt;
	unsigned DummyUnsigned;

	return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) \|\| isREVMask(M, VT, 64) \|\|
	isREVMask(M, VT, 32) \|\| isREVMask(M, VT, 16) \|\|
	isEXTMask(M, VT, DummyBool, DummyUnsigned) \|\|
	// isTBLMask(M, VT) \|\| // FIXME: Port TBL support from ARM.
	isTRNMask(M, VT, DummyUnsigned) \|\| isUZPMask(M, VT, DummyUnsigned) \|\|
	isZIPMask(M, VT, DummyUnsigned) \|\|
	isTRN_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isUZP_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isZIP_v_undef_Mask(M, VT, DummyUnsigned) \|\|
	isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) \|\|
	isConcatMask(M, VT, VT.getSizeInBits() == 128));
	}

	bool AArch64TargetLowering::isVectorClearMaskLegal(ArrayRef<int> M,
	EVT VT) const {
	// Just delegate to the generic legality, clear masks aren't special.
	return isShuffleMaskLegal(M, VT);
	}

	/// getVShiftImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift operation, where all the elements of the
	/// build_vector must have the same constant integer value.
	static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
	// Ignore bit_converts.
	while (Op.getOpcode() == ISD::BITCAST)
	Op = Op.getOperand(0);
	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
	APInt SplatBits, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (!BVN \|\| !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
	HasAnyUndefs, ElementBits) \|\|
	SplatBitSize > ElementBits)
	return false;
	Cnt = SplatBits.getSExtValue();
	return true;
	}

	/// isVShiftLImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift left operation. That value must be in the range:
	/// 0 <= Value < ElementBits for a left shift; or
	/// 0 <= Value <= ElementBits for a long left shift.
	static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (!getVShiftImm(Op, ElementBits, Cnt))
	return false;
	return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
	}

	/// isVShiftRImm - Check if this is a valid build_vector for the immediate
	/// operand of a vector shift right operation. The value must be in the range:
	/// 1 <= Value <= ElementBits for a right shift; or
	static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
	assert(VT.isVector() && "vector shift count is not a vector type");
	int64_t ElementBits = VT.getScalarSizeInBits();
	if (!getVShiftImm(Op, ElementBits, Cnt))
	return false;
	return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
	}

	SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (VT.getScalarType() == MVT::i1) {
	// Lower i1 truncate to `(x & 1) != 0`.
	SDLoc dl(Op);
	EVT OpVT = Op.getOperand(0).getValueType();
	SDValue Zero = DAG.getConstant(0, dl, OpVT);
	SDValue One = DAG.getConstant(1, dl, OpVT);
	SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
	return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
	}

	if (!VT.isVector() \|\| VT.isScalableVector())
	return SDValue();

	if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
	return LowerFixedLengthVectorTruncateToSVE(Op, DAG);

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	int64_t Cnt;

	if (!Op.getOperand(1).getValueType().isVector())
	return Op;
	unsigned EltSize = VT.getScalarSizeInBits();

	switch (Op.getOpcode()) {
	case ISD::SHL:
	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT))
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);

	if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
	return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
	DAG.getConstant(Cnt, DL, MVT::i32));
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
	DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
	MVT::i32),
	Op.getOperand(0), Op.getOperand(1));
	case ISD::SRA:
	case ISD::SRL:
	if (VT.isScalableVector() \|\| useSVEForFixedLengthVectorVT(VT)) {
	unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
	: AArch64ISD::SRL_PRED;
	return LowerToPredicatedOp(Op, DAG, Opc);
	}

	// Right shift immediate
	if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
	unsigned Opc =
	(Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
	return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
	DAG.getConstant(Cnt, DL, MVT::i32));
	}

	// Right shift register. Note, there is not a shift right register
	// instruction, but the shift left register instruction takes a signed
	// value, where negative numbers specify a right shift.
	unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
	: Intrinsic::aarch64_neon_ushl;
	// negate the shift amount
	SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
	Op.getOperand(1));
	SDValue NegShiftLeft =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
	DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
	NegShift);
	return NegShiftLeft;
	}

	llvm_unreachable("unexpected shift opcode");
	}

	static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
	AArch64CC::CondCode CC, bool NoNans, EVT VT,
	const SDLoc &dl, SelectionDAG &DAG) {
	EVT SrcVT = LHS.getValueType();
	assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
	"function only supposed to emit natural comparisons");

	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
	APInt CnstBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
	bool IsZero = IsCnst && (CnstBits == 0);

	if (SrcVT.getVectorElementType().isFloatingPoint()) {
	switch (CC) {
	default:
	return SDValue();
	case AArch64CC::NE: {
	SDValue Fcmeq;
	if (IsZero)
	Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
	else
	Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
	return DAG.getNOT(dl, Fcmeq, VT);
	}
	case AArch64CC::EQ:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
	case AArch64CC::GE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
	case AArch64CC::GT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
	case AArch64CC::LE:
	if (!NoNans)
	return SDValue();
	// If we ignore NaNs then we can use to the LS implementation.
	LLVM_FALLTHROUGH;
	case AArch64CC::LS:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
	case AArch64CC::LT:
	if (!NoNans)
	return SDValue();
	// If we ignore NaNs then we can use to the MI implementation.
	LLVM_FALLTHROUGH;
	case AArch64CC::MI:
	if (IsZero)
	return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
	}
	}

	switch (CC) {
	default:
	return SDValue();
	case AArch64CC::NE: {
	SDValue Cmeq;
	if (IsZero)
	Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
	else
	Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
	return DAG.getNOT(dl, Cmeq, VT);
	}
	case AArch64CC::EQ:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
	case AArch64CC::GE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
	case AArch64CC::GT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
	case AArch64CC::LE:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
	case AArch64CC::LS:
	return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
	case AArch64CC::LO:
	return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
	case AArch64CC::LT:
	if (IsZero)
	return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
	return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
	case AArch64CC::HI:
	return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
	case AArch64CC::HS:
	return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
	}
	}

	SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
	SelectionDAG &DAG) const {
	if (Op.getValueType().isScalableVector())
	return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);

	if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
	return LowerFixedLengthVectorSetccToSVE(Op, DAG);

	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
	SDLoc dl(Op);

	if (LHS.getValueType().getVectorElementType().isInteger()) {
	assert(LHS.getValueType() == RHS.getValueType());
	AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
	SDValue Cmp =
	EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
	return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
	}

	const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();

	// Make v4f16 (only) fcmp operations utilise vector instructions
	// v8f16 support will be a litle more complicated
	if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
	if (LHS.getValueType().getVectorNumElements() == 4) {
	LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
	RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
	SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
	DAG.ReplaceAllUsesWith(Op, NewSetcc);
	CmpVT = MVT::v4i32;
	} else
	return SDValue();
	}

	assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) \|\|
	LHS.getValueType().getVectorElementType() != MVT::f128);

	// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
	// clean. Some of them require two branches to implement.
	AArch64CC::CondCode CC1, CC2;
	bool ShouldInvert;
	changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);

	bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath \|\| Op->getFlags().hasNoNaNs();
	SDValue Cmp =
	EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
	if (!Cmp.getNode())
	return SDValue();

	if (CC2 != AArch64CC::AL) {
	SDValue Cmp2 =
	EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
	if (!Cmp2.getNode())
	return SDValue();

	Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
	}

	Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());

	if (ShouldInvert)
	Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());

	return Cmp;
	}

	static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
	SelectionDAG &DAG) {
	SDValue VecOp = ScalarOp.getOperand(0);
	auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
	DAG.getConstant(0, DL, MVT::i64));
	}

	SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Src = Op.getOperand(0);

	// Try to lower fixed length reductions to SVE.
	EVT SrcVT = Src.getValueType();
	bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND \|\|
	Op.getOpcode() == ISD::VECREDUCE_OR \|\|
	Op.getOpcode() == ISD::VECREDUCE_XOR \|\|
	Op.getOpcode() == ISD::VECREDUCE_FADD \|\|
	(Op.getOpcode() != ISD::VECREDUCE_ADD &&
	SrcVT.getVectorElementType() == MVT::i64);
	if (SrcVT.isScalableVector() \|\|
	useSVEForFixedLengthVectorVT(
	SrcVT, OverrideNEON && Subtarget->useSVEForFixedLengthVectors())) {

	if (SrcVT.getVectorElementType() == MVT::i1)
	return LowerPredReductionToSVE(Op, DAG);

	switch (Op.getOpcode()) {
	case ISD::VECREDUCE_ADD:
	return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
	case ISD::VECREDUCE_AND:
	return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
	case ISD::VECREDUCE_OR:
	return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
	case ISD::VECREDUCE_SMAX:
	return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
	case ISD::VECREDUCE_SMIN:
	return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
	case ISD::VECREDUCE_UMAX:
	return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
	case ISD::VECREDUCE_UMIN:
	return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
	case ISD::VECREDUCE_XOR:
	return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
	case ISD::VECREDUCE_FADD:
	return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
	case ISD::VECREDUCE_FMAX:
	return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
	case ISD::VECREDUCE_FMIN:
	return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
	default:
	llvm_unreachable("Unhandled fixed length reduction");
	}
	}

	// Lower NEON reductions.
	SDLoc dl(Op);
	switch (Op.getOpcode()) {
	case ISD::VECREDUCE_ADD:
	return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
	case ISD::VECREDUCE_SMAX:
	return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
	case ISD::VECREDUCE_SMIN:
	return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
	case ISD::VECREDUCE_UMAX:
	return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
	case ISD::VECREDUCE_UMIN:
	return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
	case ISD::VECREDUCE_FMAX: {
	return DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
	DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
	Src);
	}
	case ISD::VECREDUCE_FMIN: {
	return DAG.getNode(
	ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
	DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
	Src);
	}
	default:
	llvm_unreachable("Unhandled reduction");
	}
	}

	SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
	SelectionDAG &DAG) const {
	auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
	if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
	return SDValue();

	// LSE has an atomic load-add instruction, but not a load-sub.
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue RHS = Op.getOperand(2);
	AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
	RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
	Op.getOperand(0), Op.getOperand(1), RHS,
	AN->getMemOperand());
	}

	SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
	SelectionDAG &DAG) const {
	auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
	if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
	return SDValue();

	// LSE has an atomic load-clear instruction, but not a load-and.
	SDLoc dl(Op);
	MVT VT = Op.getSimpleValueType();
	SDValue RHS = Op.getOperand(2);
	AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
	RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
	return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
	Op.getOperand(0), Op.getOperand(1), RHS,
	AN->getMemOperand());
	}

	SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
	SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);

	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
	if (Subtarget->hasCustomCallingConv())
	TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);

	Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
	DAG.getConstant(4, dl, MVT::i64));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
	Chain =
	DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
	Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
	DAG.getRegisterMask(Mask), Chain.getValue(1));
	// To match the actual intent better, we should read the output from X15 here
	// again (instead of potentially spilling it to the stack), but rereading Size
	// from X15 here doesn't work at -O0, since it thinks that X15 is undefined
	// here.

	Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
	DAG.getConstant(4, dl, MVT::i64));
	return Chain;
	}

	SDValue
	AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	assert(Subtarget->isTargetWindows() &&
	"Only Windows alloca probing supported");
	SDLoc dl(Op);
	// Get the inputs.
	SDNode *Node = Op.getNode();
	SDValue Chain = Op.getOperand(0);
	SDValue Size = Op.getOperand(1);
	MaybeAlign Align =
	cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
	EVT VT = Node->getValueType(0);

	if (DAG.getMachineFunction().getFunction().hasFnAttribute(
	"no-stack-arg-probe")) {
	SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
	Chain = SP.getValue(1);
	SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
	if (Align)
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
	SDValue Ops[2] = {SP, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

	Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);

	SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
	Chain = SP.getValue(1);
	SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
	if (Align)
	SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
	DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
	Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
	DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);

	SDValue Ops[2] = {SP, Chain};
	return DAG.getMergeValues(Ops, dl);
	}

	SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT != MVT::i64 && "Expected illegal VSCALE node");

	SDLoc DL(Op);
	APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
	return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sext(64)), DL,
	VT);
	}

	/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
	template <unsigned NumVecs>
	static bool
	setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
	AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
	Info.opc = ISD::INTRINSIC_VOID;
	// Retrieve EC from first vector argument.
	const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
	ElementCount EC = VT.getVectorElementCount();
	#ifndef NDEBUG
	// Check the assumption that all input vectors are the same type.
	for (unsigned I = 0; I < NumVecs; ++I)
	assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
	"Invalid type.");
	#endif
	// memVT is `NumVecs * VT`.
	Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
	EC * NumVecs);
	Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
	Info.offset = 0;
	Info.align.reset();
	Info.flags = MachineMemOperand::MOStore;
	return true;
	}

	/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
	/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
	/// specified in the intrinsic calls.
	bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const {
	auto &DL = I.getModule()->getDataLayout();
	switch (Intrinsic) {
	case Intrinsic::aarch64_sve_st2:
	return setInfoSVEStN<2>(*this, DL, Info, I);
	case Intrinsic::aarch64_sve_st3:
	return setInfoSVEStN<3>(*this, DL, Info, I);
	case Intrinsic::aarch64_sve_st4:
	return setInfoSVEStN<4>(*this, DL, Info, I);
	case Intrinsic::aarch64_neon_ld2:
	case Intrinsic::aarch64_neon_ld3:
	case Intrinsic::aarch64_neon_ld4:
	case Intrinsic::aarch64_neon_ld1x2:
	case Intrinsic::aarch64_neon_ld1x3:
	case Intrinsic::aarch64_neon_ld1x4:
	case Intrinsic::aarch64_neon_ld2lane:
	case Intrinsic::aarch64_neon_ld3lane:
	case Intrinsic::aarch64_neon_ld4lane:
	case Intrinsic::aarch64_neon_ld2r:
	case Intrinsic::aarch64_neon_ld3r:
	case Intrinsic::aarch64_neon_ld4r: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	// Conservatively set memVT to the entire set of vectors loaded.
	uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
	Info.offset = 0;
	Info.align.reset();
	// volatile loads with NEON intrinsics not supported
	Info.flags = MachineMemOperand::MOLoad;
	return true;
	}
	case Intrinsic::aarch64_neon_st2:
	case Intrinsic::aarch64_neon_st3:
	case Intrinsic::aarch64_neon_st4:
	case Intrinsic::aarch64_neon_st1x2:
	case Intrinsic::aarch64_neon_st1x3:
	case Intrinsic::aarch64_neon_st1x4:
	case Intrinsic::aarch64_neon_st2lane:
	case Intrinsic::aarch64_neon_st3lane:
	case Intrinsic::aarch64_neon_st4lane: {
	Info.opc = ISD::INTRINSIC_VOID;
	// Conservatively set memVT to the entire set of vectors stored.
	unsigned NumElts = 0;
	for (const Value *Arg : I.args()) {
	Type *ArgTy = Arg->getType();
	if (!ArgTy->isVectorTy())
	break;
	NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
	}
	Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
	Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
	Info.offset = 0;
	Info.align.reset();
	// volatile stores with NEON intrinsics not supported
	Info.flags = MachineMemOperand::MOStore;
	return true;
	}
	case Intrinsic::aarch64_ldaxr:
	case Intrinsic::aarch64_ldxr: {
	Type *ValTy = I.getParamElementType(0);
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(ValTy);
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(ValTy);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::aarch64_stlxr:
	case Intrinsic::aarch64_stxr: {
	Type *ValTy = I.getParamElementType(1);
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(ValTy);
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(ValTy);
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::aarch64_ldaxp:
	case Intrinsic::aarch64_ldxp:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(0);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::aarch64_stlxp:
	case Intrinsic::aarch64_stxp:
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::i128;
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = Align(16);
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MOVolatile;
	return true;
	case Intrinsic::aarch64_sve_ldnt1: {
	Type *ElTy = cast<VectorType>(I.getType())->getElementType();
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(I.getType());
	Info.ptrVal = I.getArgOperand(1);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(ElTy);
	Info.flags = MachineMemOperand::MOLoad \| MachineMemOperand::MONonTemporal;
	return true;
	}
	case Intrinsic::aarch64_sve_stnt1: {
	Type *ElTy =
	cast<VectorType>(I.getArgOperand(0)->getType())->getElementType();
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(I.getOperand(0)->getType());
	Info.ptrVal = I.getArgOperand(2);
	Info.offset = 0;
	Info.align = DL.getABITypeAlign(ElTy);
	Info.flags = MachineMemOperand::MOStore \| MachineMemOperand::MONonTemporal;
	return true;
	}
	case Intrinsic::aarch64_mops_memset_tag: {
	Value *Dst = I.getArgOperand(0);
	Value *Val = I.getArgOperand(1);
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(Val->getType());
	Info.ptrVal = Dst;
	Info.offset = 0;
	Info.align = I.getParamAlign(0).valueOrOne();
	Info.flags = MachineMemOperand::MOStore;
	// The size of the memory being operated on is unknown at this point
	Info.size = MemoryLocation::UnknownSize;
	return true;
	}
	default:
	break;
	}

	return false;
	}

	bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
	ISD::LoadExtType ExtTy,
	EVT NewVT) const {
	// TODO: This may be worth removing. Check regression tests for diffs.
	if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
	return false;

	// If we're reducing the load width in order to avoid having to use an extra
	// instruction to do extension then it's probably a good idea.
	if (ExtTy != ISD::NON_EXTLOAD)
	return true;
	// Don't reduce load width if it would prevent us from combining a shift into
	// the offset.
	MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
	assert(Mem);
	const SDValue &Base = Mem->getBasePtr();
	if (Base.getOpcode() == ISD::ADD &&
	Base.getOperand(1).getOpcode() == ISD::SHL &&
	Base.getOperand(1).hasOneUse() &&
	Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
	// It's unknown whether a scalable vector has a power-of-2 bitwidth.
	if (Mem->getMemoryVT().isScalableVector())
	return false;
	// The shift can be combined if it matches the size of the value being
	// loaded (and so reducing the width would make it not match).
	uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
	uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
	if (ShiftAmount == Log2_32(LoadBytes))
	return false;
	}
	// We have no reason to disallow reducing the load width, so allow it.
	return true;
	}

	// Truncations from 64-bit GPR to 32-bit GPR is free.
	bool AArch64TargetLowering::isTruncateFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
	uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
	return NumBits1 > NumBits2;
	}
	bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
	if (VT1.isVector() \|\| VT2.isVector() \|\| !VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	uint64_t NumBits1 = VT1.getFixedSizeInBits();
	uint64_t NumBits2 = VT2.getFixedSizeInBits();
	return NumBits1 > NumBits2;
	}

	/// Check if it is profitable to hoist instruction in then/else to if.
	/// Not profitable if I and it's user can form a FMA instruction
	/// because we prefer FMSUB/FMADD.
	bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
	if (I->getOpcode() != Instruction::FMul)
	return true;

	if (!I->hasOneUse())
	return true;

	Instruction *User = I->user_back();

	if (!(User->getOpcode() == Instruction::FSub \|\|
	User->getOpcode() == Instruction::FAdd))
	return true;

	const TargetOptions &Options = getTargetMachine().Options;
	const Function *F = I->getFunction();
	const DataLayout &DL = F->getParent()->getDataLayout();
	Type *Ty = User->getOperand(0)->getType();

	return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
	isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
	(Options.AllowFPOpFusion == FPOpFusion::Fast \|\|
	Options.UnsafeFPMath));
	}

	// All 32-bit GPR operations implicitly zero the high-half of the corresponding
	// 64-bit GPR.
	bool AArch64TargetLowering::isZExtFree(Type Ty1, Type Ty2) const {
	if (!Ty1->isIntegerTy() \|\| !Ty2->isIntegerTy())
	return false;
	unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
	unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
	return NumBits1 == 32 && NumBits2 == 64;
	}
	bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
	if (VT1.isVector() \|\| VT2.isVector() \|\| !VT1.isInteger() \|\| !VT2.isInteger())
	return false;
	unsigned NumBits1 = VT1.getSizeInBits();
	unsigned NumBits2 = VT2.getSizeInBits();
	return NumBits1 == 32 && NumBits2 == 64;
	}

	bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
	EVT VT1 = Val.getValueType();
	if (isZExtFree(VT1, VT2)) {
	return true;
	}

	if (Val.getOpcode() != ISD::LOAD)
	return false;

	// 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
	return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
	VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
	VT1.getSizeInBits() <= 32);
	}

	bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
	if (isa<FPExtInst>(Ext))
	return false;

	// Vector types are not free.
	if (Ext->getType()->isVectorTy())
	return false;

	for (const Use &U : Ext->uses()) {
	// The extension is free if we can fold it with a left shift in an
	// addressing mode or an arithmetic operation: add, sub, and cmp.

	// Is there a shift?
	const Instruction *Instr = cast<Instruction>(U.getUser());

	// Is this a constant shift?
	switch (Instr->getOpcode()) {
	case Instruction::Shl:
	if (!isa<ConstantInt>(Instr->getOperand(1)))
	return false;
	break;
	case Instruction::GetElementPtr: {
	gep_type_iterator GTI = gep_type_begin(Instr);
	auto &DL = Ext->getModule()->getDataLayout();
	std::advance(GTI, U.getOperandNo()-1);
	Type *IdxTy = GTI.getIndexedType();
	// This extension will end up with a shift because of the scaling factor.
	// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
	// Get the shift amount based on the scaling factor:
	// log2(sizeof(IdxTy)) - log2(8).
	uint64_t ShiftAmt =
	countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
	// Is the constant foldable in the shift of the addressing mode?
	// I.e., shift amount is between 1 and 4 inclusive.
	if (ShiftAmt == 0 \|\| ShiftAmt > 4)
	return false;
	break;
	}
	case Instruction::Trunc:
	// Check if this is a noop.
	// trunc(sext ty1 to ty2) to ty1.
	if (Instr->getType() == Ext->getOperand(0)->getType())
	continue;
	LLVM_FALLTHROUGH;
	default:
	return false;
	}

	// At this point we can use the bfm family, so this extension is free
	// for that use.
	}
	return true;
	}

	/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
	/// or upper half of the vector elements.
	static bool areExtractShuffleVectors(Value Op1, Value Op2) {
	auto areTypesHalfed = [](Value FullV, Value HalfV) {
	auto *FullTy = FullV->getType();
	auto *HalfTy = HalfV->getType();
	return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
	2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
	};

	auto extractHalf = [](Value FullV, Value HalfV) {
	auto *FullVT = cast<FixedVectorType>(FullV->getType());
	auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
	return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
	};

	ArrayRef<int> M1, M2;
	Value S1Op1, S2Op1;
	if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) \|\|
	!match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
	return false;

	// Check that the operands are half as wide as the result and we extract
	// half of the elements of the input vectors.
	if (!areTypesHalfed(S1Op1, Op1) \|\| !areTypesHalfed(S2Op1, Op2) \|\|
	!extractHalf(S1Op1, Op1) \|\| !extractHalf(S2Op1, Op2))
	return false;

	// Check the mask extracts either the lower or upper half of vector
	// elements.
	int M1Start = -1;
	int M2Start = -1;
	int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
	if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) \|\|
	!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) \|\|
	M1Start != M2Start \|\| (M1Start != 0 && M2Start != (NumElements / 2)))
	return false;

	return true;
	}

	/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
	/// of the vector elements.
	static bool areExtractExts(Value Ext1, Value Ext2) {
	auto areExtDoubled = [](Instruction *Ext) {
	return Ext->getType()->getScalarSizeInBits() ==
	2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
	};

	if (!match(Ext1, m_ZExtOrSExt(m_Value())) \|\|
	!match(Ext2, m_ZExtOrSExt(m_Value())) \|\|
	!areExtDoubled(cast<Instruction>(Ext1)) \|\|
	!areExtDoubled(cast<Instruction>(Ext2)))
	return false;

	return true;
	}

	/// Check if Op could be used with vmull_high_p64 intrinsic.
	static bool isOperandOfVmullHighP64(Value *Op) {
	Value *VectorOperand = nullptr;
	ConstantInt *ElementIndex = nullptr;
	return match(Op, m_ExtractElt(m_Value(VectorOperand),
	m_ConstantInt(ElementIndex))) &&
	ElementIndex->getValue() == 1 &&
	isa<FixedVectorType>(VectorOperand->getType()) &&
	cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
	}

	/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
	static bool areOperandsOfVmullHighP64(Value Op1, Value Op2) {
	return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
	}

	static bool isSplatShuffle(Value *V) {
	if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V))
	return is_splat(Shuf->getShuffleMask());
	return false;
	}

	/// Check if sinking \p I's operands to I's basic block is profitable, because
	/// the operands can be folded into a target instruction, e.g.
	/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
	bool AArch64TargetLowering::shouldSinkOperands(
	Instruction I, SmallVectorImpl<Use > &Ops) const {
	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	switch (II->getIntrinsicID()) {
	case Intrinsic::aarch64_neon_smull:
	case Intrinsic::aarch64_neon_umull:
	if (areExtractShuffleVectors(II->getOperand(0), II->getOperand(1))) {
	Ops.push_back(&II->getOperandUse(0));
	Ops.push_back(&II->getOperandUse(1));
	return true;
	}
	LLVM_FALLTHROUGH;

	case Intrinsic::fma:
	if (isa<VectorType>(I->getType()) &&
	cast<VectorType>(I->getType())->getElementType()->isHalfTy() &&
	!Subtarget->hasFullFP16())
	return false;
	LLVM_FALLTHROUGH;
	case Intrinsic::aarch64_neon_sqdmull:
	case Intrinsic::aarch64_neon_sqdmulh:
	case Intrinsic::aarch64_neon_sqrdmulh:
	// Sink splats for index lane variants
	if (isSplatShuffle(II->getOperand(0)))
	Ops.push_back(&II->getOperandUse(0));
	if (isSplatShuffle(II->getOperand(1)))
	Ops.push_back(&II->getOperandUse(1));
	return !Ops.empty();
	case Intrinsic::aarch64_sve_ptest_first:
	case Intrinsic::aarch64_sve_ptest_last:
	if (auto *IIOp = dyn_cast<IntrinsicInst>(II->getOperand(0)))
	if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
	Ops.push_back(&II->getOperandUse(0));
	return !Ops.empty();
	case Intrinsic::aarch64_sme_write_horiz:
	case Intrinsic::aarch64_sme_write_vert:
	case Intrinsic::aarch64_sme_writeq_horiz:
	case Intrinsic::aarch64_sme_writeq_vert: {
	auto *Idx = dyn_cast<Instruction>(II->getOperand(1));
	if (!Idx \|\| Idx->getOpcode() != Instruction::Add)
	return false;
	Ops.push_back(&II->getOperandUse(1));
	return true;
	}
	case Intrinsic::aarch64_sme_read_horiz:
	case Intrinsic::aarch64_sme_read_vert:
	case Intrinsic::aarch64_sme_readq_horiz:
	case Intrinsic::aarch64_sme_readq_vert:
	case Intrinsic::aarch64_sme_ld1b_vert:
	case Intrinsic::aarch64_sme_ld1h_vert:
	case Intrinsic::aarch64_sme_ld1w_vert:
	case Intrinsic::aarch64_sme_ld1d_vert:
	case Intrinsic::aarch64_sme_ld1q_vert:
	case Intrinsic::aarch64_sme_st1b_vert:
	case Intrinsic::aarch64_sme_st1h_vert:
	case Intrinsic::aarch64_sme_st1w_vert:
	case Intrinsic::aarch64_sme_st1d_vert:
	case Intrinsic::aarch64_sme_st1q_vert:
	case Intrinsic::aarch64_sme_ld1b_horiz:
	case Intrinsic::aarch64_sme_ld1h_horiz:
	case Intrinsic::aarch64_sme_ld1w_horiz:
	case Intrinsic::aarch64_sme_ld1d_horiz:
	case Intrinsic::aarch64_sme_ld1q_horiz:
	case Intrinsic::aarch64_sme_st1b_horiz:
	case Intrinsic::aarch64_sme_st1h_horiz:
	case Intrinsic::aarch64_sme_st1w_horiz:
	case Intrinsic::aarch64_sme_st1d_horiz:
	case Intrinsic::aarch64_sme_st1q_horiz: {
	auto *Idx = dyn_cast<Instruction>(II->getOperand(3));
	if (!Idx \|\| Idx->getOpcode() != Instruction::Add)
	return false;
	Ops.push_back(&II->getOperandUse(3));
	return true;
	}
	case Intrinsic::aarch64_neon_pmull:
	if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
	return false;
	Ops.push_back(&II->getOperandUse(0));
	Ops.push_back(&II->getOperandUse(1));
	return true;
	case Intrinsic::aarch64_neon_pmull64:
	if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
	II->getArgOperand(1)))
	return false;
	Ops.push_back(&II->getArgOperandUse(0));
	Ops.push_back(&II->getArgOperandUse(1));
	return true;
	default:
	return false;
	}
	}

	if (!I->getType()->isVectorTy())
	return false;

	switch (I->getOpcode()) {
	case Instruction::Sub:
	case Instruction::Add: {
	if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
	return false;

	// If the exts' operands extract either the lower or upper elements, we
	// can sink them too.
	auto Ext1 = cast<Instruction>(I->getOperand(0));
	auto Ext2 = cast<Instruction>(I->getOperand(1));
	if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
	Ops.push_back(&Ext1->getOperandUse(0));
	Ops.push_back(&Ext2->getOperandUse(0));
	}

	Ops.push_back(&I->getOperandUse(0));
	Ops.push_back(&I->getOperandUse(1));

	return true;
	}
	case Instruction::Mul: {
	bool IsProfitable = false;
	for (auto &Op : I->operands()) {
	// Make sure we are not already sinking this operand
	if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
	continue;

	ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
	if (!Shuffle \|\| !Shuffle->isZeroEltSplat())
	continue;

	Value *ShuffleOperand = Shuffle->getOperand(0);
	InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
	if (!Insert)
	continue;

	Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
	if (!OperandInstr)
	continue;

	ConstantInt *ElementConstant =
	dyn_cast<ConstantInt>(Insert->getOperand(2));
	// Check that the insertelement is inserting into element 0
	if (!ElementConstant \|\| ElementConstant->getZExtValue() != 0)
	continue;

	unsigned Opcode = OperandInstr->getOpcode();
	if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
	continue;

	Ops.push_back(&Shuffle->getOperandUse(0));
	Ops.push_back(&Op);
	IsProfitable = true;
	}

	return IsProfitable;
	}
	default:
	return false;
	}
	return false;
	}

	bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
	Align &RequiredAligment) const {
	if (!LoadedType.isSimple() \|\|
	(!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
	return false;
	// Cyclone supports unaligned accesses.
	RequiredAligment = Align(1);
	unsigned NumBits = LoadedType.getSizeInBits();
	return NumBits == 32 \|\| NumBits == 64;
	}

	/// A helper function for determining the number of interleaved accesses we
	/// will generate when lowering accesses of the given type.
	unsigned AArch64TargetLowering::getNumInterleavedAccesses(
	VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
	unsigned VecSize = UseScalable ? Subtarget->getMinSVEVectorSizeInBits() : 128;
	return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
	}

	MachineMemOperand::Flags
	AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
	if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
	I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
	return MOStridedAccess;
	return MachineMemOperand::MONone;
	}

	bool AArch64TargetLowering::isLegalInterleavedAccessType(
	VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {

	unsigned VecSize = DL.getTypeSizeInBits(VecTy);
	unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
	unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();

	UseScalable = false;

	// Ensure the number of vector elements is greater than 1.
	if (NumElements < 2)
	return false;

	// Ensure the element type is legal.
	if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
	return false;

	if (Subtarget->useSVEForFixedLengthVectors() &&
	(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 \|\|
	(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
	isPowerOf2_32(NumElements) && VecSize > 128))) {
	UseScalable = true;
	return true;
	}

	// Ensure the total vector size is 64 or a multiple of 128. Types larger than
	// 128 will be split into multiple interleaved accesses.
	return VecSize == 64 \|\| VecSize % 128 == 0;
	}

	static ScalableVectorType getSVEContainerIRType(FixedVectorType VTy) {
	if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 2);

	if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 4);

	if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 8);

	if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 8);

	if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 2);

	if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 4);

	if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 8);

	if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
	return ScalableVectorType::get(VTy->getElementType(), 16);

	llvm_unreachable("Cannot handle input vector type");
	}

	/// Lower an interleaved load into a ldN intrinsic.
	///
	/// E.g. Lower an interleaved load (Factor = 2):
	/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
	/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
	/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
	///
	/// Into:
	/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
	/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
	/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
	bool AArch64TargetLowering::lowerInterleavedLoad(
	LoadInst LI, ArrayRef<ShuffleVectorInst > Shuffles,
	ArrayRef<unsigned> Indices, unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");
	assert(!Shuffles.empty() && "Empty shufflevector input");
	assert(Shuffles.size() == Indices.size() &&
	"Unmatched number of shufflevectors and indices");

	const DataLayout &DL = LI->getModule()->getDataLayout();

	VectorType *VTy = Shuffles[0]->getType();

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	bool UseScalable;
	if (!Subtarget->hasNEON() \|\|
	!isLegalInterleavedAccessType(VTy, DL, UseScalable))
	return false;

	unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);

	auto *FVTy = cast<FixedVectorType>(VTy);

	// A pointer vector can not be the return type of the ldN intrinsics. Need to
	// load integer vectors first and then convert to pointer vectors.
	Type *EltTy = FVTy->getElementType();
	if (EltTy->isPointerTy())
	FVTy =
	FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());

	// If we're going to generate more than one load, reset the sub-vector type
	// to something legal.
	FVTy = FixedVectorType::get(FVTy->getElementType(),
	FVTy->getNumElements() / NumLoads);

	auto *LDVTy =
	UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;

	IRBuilder<> Builder(LI);

	// The base address of the load.
	Value *BaseAddr = LI->getPointerOperand();

	if (NumLoads > 1) {
	// We will compute the pointer operand of each load from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr,
	LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
	}

	Type *PtrTy =
	UseScalable
	? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
	: LDVTy->getPointerTo(LI->getPointerAddressSpace());
	Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
	LDVTy->getElementCount());

	static const Intrinsic::ID SVELoadIntrs[3] = {
	Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
	Intrinsic::aarch64_sve_ld4_sret};
	static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
	Intrinsic::aarch64_neon_ld3,
	Intrinsic::aarch64_neon_ld4};
	Function *LdNFunc;
	if (UseScalable)
	LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
	SVELoadIntrs[Factor - 2], {LDVTy});
	else
	LdNFunc = Intrinsic::getDeclaration(
	LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});

	// Holds sub-vectors extracted from the load intrinsic return values. The
	// sub-vectors are associated with the shufflevector instructions they will
	// replace.
	DenseMap<ShuffleVectorInst , SmallVector<Value , 4>> SubVecs;

	Value *PTrue = nullptr;
	if (UseScalable) {
	Optional<unsigned> PgPattern =
	getSVEPredPatternFromNumElements(FVTy->getNumElements());
	if (Subtarget->getMinSVEVectorSizeInBits() ==
	Subtarget->getMaxSVEVectorSizeInBits() &&
	Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
	PgPattern = AArch64SVEPredPattern::all;

	auto *PTruePat =
	ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), *PgPattern);
	PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
	{PTruePat});
	}

	for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {

	// If we're generating more than one load, compute the base address of
	// subsequent loads as an offset from the previous.
	if (LoadCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
	FVTy->getNumElements() * Factor);

	CallInst *LdN;
	if (UseScalable)
	LdN = Builder.CreateCall(
	LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
	else
	LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
	"ldN");

	// Extract and store the sub-vectors returned by the load intrinsic.
	for (unsigned i = 0; i < Shuffles.size(); i++) {
	ShuffleVectorInst *SVI = Shuffles[i];
	unsigned Index = Indices[i];

	Value *SubVec = Builder.CreateExtractValue(LdN, Index);

	if (UseScalable)
	SubVec = Builder.CreateExtractVector(
	FVTy, SubVec,
	ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));

	// Convert the integer vector to pointer vector if the element is pointer.
	if (EltTy->isPointerTy())
	SubVec = Builder.CreateIntToPtr(
	SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
	FVTy->getNumElements()));

	SubVecs[SVI].push_back(SubVec);
	}
	}

	// Replace uses of the shufflevector instructions with the sub-vectors
	// returned by the load intrinsic. If a shufflevector instruction is
	// associated with more than one sub-vector, those sub-vectors will be
	// concatenated into a single wide vector.
	for (ShuffleVectorInst *SVI : Shuffles) {
	auto &SubVec = SubVecs[SVI];
	auto *WideVec =
	SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
	SVI->replaceAllUsesWith(WideVec);
	}

	return true;
	}

	/// Lower an interleaved store into a stN intrinsic.
	///
	/// E.g. Lower an interleaved store (Factor = 3):
	/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
	/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr
	///
	/// Into:
	/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
	/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
	/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
	/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
	///
	/// Note that the new shufflevectors will be removed and we'll only generate one
	/// st3 instruction in CodeGen.
	///
	/// Example for a more general valid mask (Factor 3). Lower:
	/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
	/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
	/// store <12 x i32> %i.vec, <12 x i32>* %ptr
	///
	/// Into:
	/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
	/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
	/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
	/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
	bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
	ShuffleVectorInst *SVI,
	unsigned Factor) const {
	assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
	"Invalid interleave factor");

	auto *VecTy = cast<FixedVectorType>(SVI->getType());
	assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

	unsigned LaneLen = VecTy->getNumElements() / Factor;
	Type *EltTy = VecTy->getElementType();
	auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);

	const DataLayout &DL = SI->getModule()->getDataLayout();
	bool UseScalable;

	// Skip if we do not have NEON and skip illegal vector types. We can
	// "legalize" wide vector types into multiple interleaved accesses as long as
	// the vector types are divisible by 128.
	if (!Subtarget->hasNEON() \|\|
	!isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
	return false;

	unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);

	Value *Op0 = SVI->getOperand(0);
	Value *Op1 = SVI->getOperand(1);
	IRBuilder<> Builder(SI);

	// StN intrinsics don't support pointer vectors as arguments. Convert pointer
	// vectors to integer vectors.
	if (EltTy->isPointerTy()) {
	Type *IntTy = DL.getIntPtrType(EltTy);
	unsigned NumOpElts =
	cast<FixedVectorType>(Op0->getType())->getNumElements();

	// Convert to the corresponding integer vector.
	auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
	Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
	Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);

	SubVecTy = FixedVectorType::get(IntTy, LaneLen);
	}

	// If we're going to generate more than one store, reset the lane length
	// and sub-vector type to something legal.
	LaneLen /= NumStores;
	SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);

	auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
	: SubVecTy;

	// The base address of the store.
	Value *BaseAddr = SI->getPointerOperand();

	if (NumStores > 1) {
	// We will compute the pointer operand of each store from the original base
	// address using GEPs. Cast the base address to a pointer to the scalar
	// element type.
	BaseAddr = Builder.CreateBitCast(
	BaseAddr,
	SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
	}

	auto Mask = SVI->getShuffleMask();

	Type *PtrTy =
	UseScalable
	? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
	: STVTy->getPointerTo(SI->getPointerAddressSpace());
	Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
	STVTy->getElementCount());

	static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
	Intrinsic::aarch64_sve_st3,
	Intrinsic::aarch64_sve_st4};
	static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
	Intrinsic::aarch64_neon_st3,
	Intrinsic::aarch64_neon_st4};
	Function *StNFunc;
	if (UseScalable)
	StNFunc = Intrinsic::getDeclaration(SI->getModule(),
	SVEStoreIntrs[Factor - 2], {STVTy});
	else
	StNFunc = Intrinsic::getDeclaration(
	SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});

	Value *PTrue = nullptr;
	if (UseScalable) {
	Optional<unsigned> PgPattern =
	getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
	if (Subtarget->getMinSVEVectorSizeInBits() ==
	Subtarget->getMaxSVEVectorSizeInBits() &&
	Subtarget->getMinSVEVectorSizeInBits() ==
	DL.getTypeSizeInBits(SubVecTy))
	PgPattern = AArch64SVEPredPattern::all;

	auto *PTruePat =
	ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), *PgPattern);
	PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
	{PTruePat});
	}

	for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {

	SmallVector<Value *, 5> Ops;

	// Split the shufflevector operands into sub vectors for the new stN call.
	for (unsigned i = 0; i < Factor; i++) {
	Value *Shuffle;
	unsigned IdxI = StoreCount * LaneLen * Factor + i;
	if (Mask[IdxI] >= 0) {
	Shuffle = Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
	} else {
	unsigned StartMask = 0;
	for (unsigned j = 1; j < LaneLen; j++) {
	unsigned IdxJ = StoreCount * LaneLen * Factor + j;
	if (Mask[IdxJ * Factor + IdxI] >= 0) {
	StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
	break;
	}
	}
	// Note: Filling undef gaps with random elements is ok, since
	// those elements were being written anyway (with undefs).
	// In the case of all undefs we're defaulting to using elems from 0
	// Note: StartMask cannot be negative, it's checked in
	// isReInterleaveMask
	Shuffle = Builder.CreateShuffleVector(
	Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
	}

	if (UseScalable)
	Shuffle = Builder.CreateInsertVector(
	STVTy, UndefValue::get(STVTy), Shuffle,
	ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));

	Ops.push_back(Shuffle);
	}

	if (UseScalable)
	Ops.push_back(PTrue);

	// If we generating more than one store, we compute the base address of
	// subsequent stores as an offset from the previous.
	if (StoreCount > 0)
	BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
	BaseAddr, LaneLen * Factor);

	Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
	Builder.CreateCall(StNFunc, Ops);
	}
	return true;
	}

	// Lower an SVE structured load intrinsic returning a tuple type to target
	// specific intrinsic taking the same input but returning a multi-result value
	// of the split tuple type.
	//
	// E.g. Lowering an LD3:
	//
	// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
	// <vscale x 4 x i1> %pred,
	// <vscale x 4 x i32>* %addr)
	//
	// Output DAG:
	//
	// t0: ch = EntryToken
	// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
	// t4: i64,ch = CopyFromReg t0, Register:i64 %1
	// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
	// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
	//
	// This is called pre-legalization to avoid widening/splitting issues with
	// non-power-of-2 tuple types used for LD3, such as nxv12i32.
	SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
	ArrayRef<SDValue> LoadOps,
	EVT VT, SelectionDAG &DAG,
	const SDLoc &DL) const {
	assert(VT.isScalableVector() && "Can only lower scalable vectors");

	unsigned N, Opcode;
	static const std::pair<unsigned, std::pair<unsigned, unsigned>>
	IntrinsicMap[] = {
	{Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
	{Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
	{Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};

	std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) {
	return P.first == Intrinsic;
	})->second;
	assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&
	"invalid tuple vector type!");

	EVT SplitVT =
	EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
	VT.getVectorElementCount().divideCoefficientBy(N));
	assert(isTypeLegal(SplitVT));

	SmallVector<EVT, 5> VTs(N, SplitVT);
	VTs.push_back(MVT::Other); // Chain
	SDVTList NodeTys = DAG.getVTList(VTs);

	SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
	SmallVector<SDValue, 4> PseudoLoadOps;
	for (unsigned I = 0; I < N; ++I)
	PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
	}

	EVT AArch64TargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
	bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
	bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
	// Only use AdvSIMD to implement memset of 32-byte and above. It would have
	// taken one instruction to materialize the v2i64 zero and one store (with
	// restrictive addressing mode). Just do i64 stores.
	bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
	auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
	if (Op.isAligned(AlignCheck))
	return true;
	bool Fast;
	return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
	MachineMemOperand::MONone, &Fast) &&
	Fast;
	};

	if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
	AlignmentIsAcceptable(MVT::v16i8, Align(16)))
	return MVT::v16i8;
	if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
	return MVT::f128;
	if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
	return MVT::i64;
	if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
	return MVT::i32;
	return MVT::Other;
	}

	LLT AArch64TargetLowering::getOptimalMemOpLLT(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
	bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
	bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
	// Only use AdvSIMD to implement memset of 32-byte and above. It would have
	// taken one instruction to materialize the v2i64 zero and one store (with
	// restrictive addressing mode). Just do i64 stores.
	bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
	auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
	if (Op.isAligned(AlignCheck))
	return true;
	bool Fast;
	return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
	MachineMemOperand::MONone, &Fast) &&
	Fast;
	};

	if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
	AlignmentIsAcceptable(MVT::v2i64, Align(16)))
	return LLT::fixed_vector(2, 64);
	if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
	return LLT::scalar(128);
	if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
	return LLT::scalar(64);
	if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
	return LLT::scalar(32);
	return LLT();
	}

	// 12-bit optionally shifted immediates are legal for adds.
	bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
	if (Immed == std::numeric_limits<int64_t>::min()) {
	LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
	<< ": avoid UB for INT64_MIN\n");
	return false;
	}
	// Same encoding for add/sub, just flip the sign.
	Immed = std::abs(Immed);
	bool IsLegal = ((Immed >> 12) == 0 \|\|
	((Immed & 0xfff) == 0 && Immed >> 24 == 0));
	LLVM_DEBUG(dbgs() << "Is " << Immed
	<< " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
	return IsLegal;
	}

	// Return false to prevent folding
	// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
	// if the folding leads to worse code.
	bool AArch64TargetLowering::isMulAddWithConstProfitable(
	SDValue AddNode, SDValue ConstNode) const {
	// Let the DAGCombiner decide for vector types and large types.
	const EVT VT = AddNode.getValueType();
	if (VT.isVector() \|\| VT.getScalarSizeInBits() > 64)
	return true;

	// It is worse if c1 is legal add immediate, while c1*c2 is not
	// and has to be composed by at least two instructions.
	const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
	const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
	const int64_t C1 = C1Node->getSExtValue();
	const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
	if (!isLegalAddImmediate(C1) \|\| isLegalAddImmediate(C1C2.getSExtValue()))
	return true;
	SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
	AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
	if (Insn.size() > 1)
	return false;

	// Default to true and let the DAGCombiner decide.
	return true;
	}

	// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
	// immediates is the same as for an add or a sub.
	bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
	return isLegalAddImmediate(Immed);
	}

	/// isLegalAddressingMode - Return true if the addressing mode represented
	/// by AM is legal for this target, for a load/store of the specified type.
	bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS, Instruction *I) const {
	// AArch64 has five basic addressing modes:
	// reg
	// reg + 9-bit signed offset
	// reg + SIZE_IN_BYTES * 12-bit unsigned offset
	// reg1 + reg2
	// reg + SIZE_IN_BYTES * reg

	// No global is ever allowed as a base.
	if (AM.BaseGV)
	return false;

	// No reg+reg+imm addressing.
	if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
	return false;

	// FIXME: Update this method to support scalable addressing modes.
	if (isa<ScalableVectorType>(Ty)) {
	uint64_t VecElemNumBytes =
	DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
	return AM.HasBaseReg && !AM.BaseOffs &&
	(AM.Scale == 0 \|\| (uint64_t)AM.Scale == VecElemNumBytes);
	}

	// check reg + imm case:
	// i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
	uint64_t NumBytes = 0;
	if (Ty->isSized()) {
	uint64_t NumBits = DL.getTypeSizeInBits(Ty);
	NumBytes = NumBits / 8;
	if (!isPowerOf2_64(NumBits))
	NumBytes = 0;
	}

	if (!AM.Scale) {
	int64_t Offset = AM.BaseOffs;

	// 9-bit signed offset
	if (isInt<9>(Offset))
	return true;

	// 12-bit unsigned offset
	unsigned shift = Log2_64(NumBytes);
	if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
	// Must be a multiple of NumBytes (NumBytes is a power of 2)
	(Offset >> shift) << shift == Offset)
	return true;
	return false;
	}

	// Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2

	return AM.Scale == 1 \|\| (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
	}

	bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
	// Consider splitting large offset of struct or array.
	return true;
	}

	InstructionCost AArch64TargetLowering::getScalingFactorCost(
	const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
	// Scaling factors are not free at all.
	// Operands \| Rt Latency
	// -------------------------------------------
	// Rt, [Xn, Xm] \| 4
	// -------------------------------------------
	// Rt, [Xn, Xm, lsl #imm] \| Rn: 4 Rm: 5
	// Rt, [Xn, Wm, <extend> #imm] \|
	if (isLegalAddressingMode(DL, AM, Ty, AS))
	// Scale represents reg2 * scale, thus account for 1 if
	// it is not equal to 0 or 1.
	return AM.Scale != 0 && AM.Scale != 1;
	return -1;
	}

	bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
	const MachineFunction &MF, EVT VT) const {
	VT = VT.getScalarType();

	if (!VT.isSimple())
	return false;

	switch (VT.getSimpleVT().SimpleTy) {
	case MVT::f16:
	return Subtarget->hasFullFP16();
	case MVT::f32:
	case MVT::f64:
	return true;
	default:
	break;
	}

	return false;
	}

	bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
	Type *Ty) const {
	switch (Ty->getScalarType()->getTypeID()) {
	case Type::FloatTyID:
	case Type::DoubleTyID:
	return true;
	default:
	return false;
	}
	}

	bool AArch64TargetLowering::generateFMAsInMachineCombiner(
	EVT VT, CodeGenOpt::Level OptLevel) const {
	return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
	!useSVEForFixedLengthVectorVT(VT);
	}

	const MCPhysReg *
	AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
	// LR is a callee-save register, but we must treat it as clobbered by any call
	// site. Hence we include LR in the scratch registers, which are in turn added
	// as implicit-defs for stackmaps and patchpoints.
	static const MCPhysReg ScratchRegs[] = {
	AArch64::X16, AArch64::X17, AArch64::LR, 0
	};
	return ScratchRegs;
	}

	bool
	AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
	CombineLevel Level) const {
	assert((N->getOpcode() == ISD::SHL \|\| N->getOpcode() == ISD::SRA \|\|
	N->getOpcode() == ISD::SRL) &&
	"Expected shift op");

	SDValue ShiftLHS = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// If ShiftLHS is unsigned bit extraction: ((x >> C) & mask), then do not combine
	// it with shift 'N' to let it be lowered to UBFX.
	if (ShiftLHS.getOpcode() == ISD::AND && (VT == MVT::i32 \|\| VT == MVT::i64) &&
	isa<ConstantSDNode>(ShiftLHS.getOperand(1))) {
	uint64_t TruncMask = ShiftLHS.getConstantOperandVal(1);
	if (isMask_64(TruncMask) &&
	ShiftLHS.getOperand(0).getOpcode() == ISD::SRL &&
	isa<ConstantSDNode>(ShiftLHS.getOperand(0).getOperand(1)))
	return false;
	}
	return true;
	}

	bool AArch64TargetLowering::isDesirableToCommuteXorWithShift(
	const SDNode *N) const {
	assert(N->getOpcode() == ISD::XOR &&
	(N->getOperand(0).getOpcode() == ISD::SHL \|\|
	N->getOperand(0).getOpcode() == ISD::SRL) &&
	"Expected XOR(SHIFT) pattern");

	// Only commute if the entire NOT mask is a hidden shifted mask.
	auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
	auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
	if (XorC && ShiftC) {
	unsigned MaskIdx, MaskLen;
	if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
	unsigned ShiftAmt = ShiftC->getZExtValue();
	unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
	if (N->getOperand(0).getOpcode() == ISD::SHL)
	return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
	return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
	}
	}

	return false;
	}

	bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
	const SDNode *N, CombineLevel Level) const {
	assert(((N->getOpcode() == ISD::SHL &&
	N->getOperand(0).getOpcode() == ISD::SRL) \|\|
	(N->getOpcode() == ISD::SRL &&
	N->getOperand(0).getOpcode() == ISD::SHL)) &&
	"Expected shift-shift mask");
	// Don't allow multiuse shift folding with the same shift amount.
	if (!N->getOperand(0)->hasOneUse())
	return false;

	// Only fold srl(shl(x,c1),c2) iff C1 >= C2 to prevent loss of UBFX patterns.
	EVT VT = N->getValueType(0);
	if (N->getOpcode() == ISD::SRL && (VT == MVT::i32 \|\| VT == MVT::i64)) {
	auto *C1 = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
	auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
	return (!C1 \|\| !C2 \|\| C1->getZExtValue() >= C2->getZExtValue());
	}

	return true;
	}

	bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	assert(Ty->isIntegerTy());

	unsigned BitSize = Ty->getPrimitiveSizeInBits();
	if (BitSize == 0)
	return false;

	int64_t Val = Imm.getSExtValue();
	if (Val == 0 \|\| AArch64_AM::isLogicalImmediate(Val, BitSize))
	return true;

	if ((int64_t)Val < 0)
	Val = ~Val;
	if (BitSize == 32)
	Val &= (1LL << 32) - 1;

	unsigned LZ = countLeadingZeros((uint64_t)Val);
	unsigned Shift = (63 - LZ) / 16;
	// MOVZ is free so return true for one or fewer MOVK.
	return Shift < 3;
	}

	bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	return (Index == 0 \|\| Index == ResVT.getVectorMinNumElements());
	}

	/// Turn vector tests of the signbit in the form of:
	/// xor (sra X, elt_size(X)-1), -1
	/// into:
	/// cmge X, X, #0
	static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	EVT VT = N->getValueType(0);
	if (!Subtarget->hasNEON() \|\| !VT.isVector())
	return SDValue();

	// There must be a shift right algebraic before the xor, and the xor must be a
	// 'not' operation.
	SDValue Shift = N->getOperand(0);
	SDValue Ones = N->getOperand(1);
	if (Shift.getOpcode() != AArch64ISD::VASHR \|\| !Shift.hasOneUse() \|\|
	!ISD::isBuildVectorAllOnes(Ones.getNode()))
	return SDValue();

	// The shift should be smearing the sign bit across each vector element.
	auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
	EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
	if (!ShiftAmt \|\| ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
	return SDValue();

	return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
	}

	// Given a vecreduce_add node, detect the below pattern and convert it to the
	// node sequence with UABDL, [S\|U]ADB and UADDLP.
	//
	// i32 vecreduce_add(
	// v16i32 abs(
	// v16i32 sub(
	// v16i32 [sign\|zero]_extend(v16i8 a), v16i32 [sign\|zero]_extend(v16i8 b))))
	// =================>
	// i32 vecreduce_add(
	// v4i32 UADDLP(
	// v8i16 add(
	// v8i16 zext(
	// v8i8 [S\|U]ABD low8:v16i8 a, low8:v16i8 b
	// v8i16 zext(
	// v8i8 [S\|U]ABD high8:v16i8 a, high8:v16i8 b
	static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
	SelectionDAG &DAG) {
	// Assumed i32 vecreduce_add
	if (N->getValueType(0) != MVT::i32)
	return SDValue();

	SDValue VecReduceOp0 = N->getOperand(0);
	unsigned Opcode = VecReduceOp0.getOpcode();
	// Assumed v16i32 abs
	if (Opcode != ISD::ABS \|\| VecReduceOp0->getValueType(0) != MVT::v16i32)
	return SDValue();

	SDValue ABS = VecReduceOp0;
	// Assumed v16i32 sub
	if (ABS->getOperand(0)->getOpcode() != ISD::SUB \|\|
	ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
	return SDValue();

	SDValue SUB = ABS->getOperand(0);
	unsigned Opcode0 = SUB->getOperand(0).getOpcode();
	unsigned Opcode1 = SUB->getOperand(1).getOpcode();
	// Assumed v16i32 type
	if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 \|\|
	SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
	return SDValue();

	// Assumed zext or sext
	bool IsZExt = false;
	if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
	IsZExt = true;
	} else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
	IsZExt = false;
	} else
	return SDValue();

	SDValue EXT0 = SUB->getOperand(0);
	SDValue EXT1 = SUB->getOperand(1);
	// Assumed zext's operand has v16i8 type
	if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 \|\|
	EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
	return SDValue();

	// Pattern is dectected. Let's convert it to sequence of nodes.
	SDLoc DL(N);

	// First, create the node pattern of UABD/SABD.
	SDValue UABDHigh8Op0 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
	DAG.getConstant(8, DL, MVT::i64));
	SDValue UABDHigh8Op1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
	DAG.getConstant(8, DL, MVT::i64));
	SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
	UABDHigh8Op0, UABDHigh8Op1);
	SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);

	// Second, create the node pattern of UABAL.
	SDValue UABDLo8Op0 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
	DAG.getConstant(0, DL, MVT::i64));
	SDValue UABDLo8Op1 =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
	DAG.getConstant(0, DL, MVT::i64));
	SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
	UABDLo8Op0, UABDLo8Op1);
	SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
	SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);

	// Third, create the node of UADDLP.
	SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);

	// Fourth, create the node of VECREDUCE_ADD.
	return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
	}

	// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
	// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
	// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
	static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *ST) {
	if (!ST->hasDotProd())
	return performVecReduceAddCombineWithUADDLP(N, DAG);

	SDValue Op0 = N->getOperand(0);
	if (N->getValueType(0) != MVT::i32 \|\|
	Op0.getValueType().getVectorElementType() != MVT::i32)
	return SDValue();

	unsigned ExtOpcode = Op0.getOpcode();
	SDValue A = Op0;
	SDValue B;
	if (ExtOpcode == ISD::MUL) {
	A = Op0.getOperand(0);
	B = Op0.getOperand(1);
	if (A.getOpcode() != B.getOpcode() \|\|
	A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
	return SDValue();
	ExtOpcode = A.getOpcode();
	}
	if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
	return SDValue();

	EVT Op0VT = A.getOperand(0).getValueType();
	if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
	return SDValue();

	SDLoc DL(Op0);
	// For non-mla reductions B can be set to 1. For MLA we take the operand of
	// the extend B.
	if (!B)
	B = DAG.getConstant(1, DL, Op0VT);
	else
	B = B.getOperand(0);

	SDValue Zeros =
	DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
	auto DotOpcode =
	(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
	SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
	A.getOperand(0), B);
	return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
	}

	// Given an (integer) vecreduce, we know the order of the inputs does not
	// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
	// into UADDV(UADDLP(x)). This can also happen through an extra add, where we
	// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
	static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
	auto DetectAddExtract = [&](SDValue A) {
	// Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
	// UADDLP(x) if found.
	if (A.getOpcode() != ISD::ADD)
	return SDValue();
	EVT VT = A.getValueType();
	SDValue Op0 = A.getOperand(0);
	SDValue Op1 = A.getOperand(1);
	if (Op0.getOpcode() != Op0.getOpcode() \|\|
	(Op0.getOpcode() != ISD::ZERO_EXTEND &&
	Op0.getOpcode() != ISD::SIGN_EXTEND))
	return SDValue();
	SDValue Ext0 = Op0.getOperand(0);
	SDValue Ext1 = Op1.getOperand(0);
	if (Ext0.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	Ext1.getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	Ext0.getOperand(0) != Ext1.getOperand(0))
	return SDValue();
	// Check that the type is twice the add types, and the extract are from
	// upper/lower parts of the same source.
	if (Ext0.getOperand(0).getValueType().getVectorNumElements() !=
	VT.getVectorNumElements() * 2)
	return SDValue();
	if ((Ext0.getConstantOperandVal(1) != 0 &&
	Ext1.getConstantOperandVal(1) != VT.getVectorNumElements()) &&
	(Ext1.getConstantOperandVal(1) != 0 &&
	Ext0.getConstantOperandVal(1) != VT.getVectorNumElements()))
	return SDValue();
	unsigned Opcode = Op0.getOpcode() == ISD::ZERO_EXTEND ? AArch64ISD::UADDLP
	: AArch64ISD::SADDLP;
	return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
	};

	SDValue A = N->getOperand(0);
	if (SDValue R = DetectAddExtract(A))
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
	if (A.getOpcode() == ISD::ADD) {
	if (SDValue R = DetectAddExtract(A.getOperand(0)))
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
	DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
	A.getOperand(1)));
	if (SDValue R = DetectAddExtract(A.getOperand(1)))
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
	DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
	A.getOperand(0)));
	}
	return SDValue();
	}


	static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
	}

	SDValue
	AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isIntDivCheap(N->getValueType(0), Attr))
	return SDValue(N,0); // Lower SDIV as SDIV

	EVT VT = N->getValueType(0);

	// For scalable and fixed types, mark them as cheap so we can handle it much
	// later. This allows us to handle larger than legal types.
	if (VT.isScalableVector() \|\| Subtarget->useSVEForFixedLengthVectors())
	return SDValue(N, 0);

	// fold (sdiv X, pow2)
	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	unsigned Lg2 = Divisor.countTrailingZeros();
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);

	// Add (N0 < 0) ? Pow2 - 1 : 0;
	SDValue CCVal;
	SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
	SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
	SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);

	Created.push_back(Cmp.getNode());
	Created.push_back(Add.getNode());
	Created.push_back(CSel.getNode());

	// Divide by pow2.
	SDValue SRA =
	DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));

	// If we're dividing by a positive value, we're done. Otherwise, we must
	// negate the result.
	if (Divisor.isNonNegative())
	return SRA;

	Created.push_back(SRA.getNode());
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
	}

	SDValue
	AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
	SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const {
	AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
	if (isIntDivCheap(N->getValueType(0), Attr))
	return SDValue(N, 0); // Lower SREM as SREM

	EVT VT = N->getValueType(0);

	// For scalable and fixed types, mark them as cheap so we can handle it much
	// later. This allows us to handle larger than legal types.
	if (VT.isScalableVector() \|\| Subtarget->useSVEForFixedLengthVectors())
	return SDValue(N, 0);

	// fold (srem X, pow2)
	if ((VT != MVT::i32 && VT != MVT::i64) \|\|
	!(Divisor.isPowerOf2() \|\| Divisor.isNegatedPowerOf2()))
	return SDValue();

	unsigned Lg2 = Divisor.countTrailingZeros();
	if (Lg2 == 0)
	return SDValue();

	SDLoc DL(N);
	SDValue N0 = N->getOperand(0);
	SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
	SDValue Zero = DAG.getConstant(0, DL, VT);
	SDValue CCVal, CSNeg;
	if (Lg2 == 1) {
	SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETGE, CCVal, DAG, DL);
	SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
	CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, And, And, CCVal, Cmp);

	Created.push_back(Cmp.getNode());
	Created.push_back(And.getNode());
	} else {
	SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC);
	SDVTList VTs = DAG.getVTList(VT, MVT::i32);

	SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0);
	SDValue AndPos = DAG.getNode(ISD::AND, DL, VT, N0, Pow2MinusOne);
	SDValue AndNeg = DAG.getNode(ISD::AND, DL, VT, Negs, Pow2MinusOne);
	CSNeg = DAG.getNode(AArch64ISD::CSNEG, DL, VT, AndPos, AndNeg, CCVal,
	Negs.getValue(1));

	Created.push_back(Negs.getNode());
	Created.push_back(AndPos.getNode());
	Created.push_back(AndNeg.getNode());
	}

	return CSNeg;
	}

	static bool IsSVECntIntrinsic(SDValue S) {
	switch(getIntrinsicID(S.getNode())) {
	default:
	break;
	case Intrinsic::aarch64_sve_cntb:
	case Intrinsic::aarch64_sve_cnth:
	case Intrinsic::aarch64_sve_cntw:
	case Intrinsic::aarch64_sve_cntd:
	return true;
	}
	return false;
	}

	/// Calculates what the pre-extend type is, based on the extension
	/// operation node provided by \p Extend.
	///
	/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
	/// pre-extend type is pulled directly from the operand, while other extend
	/// operations need a bit more inspection to get this information.
	///
	/// \param Extend The SDNode from the DAG that represents the extend operation
	///
	/// \returns The type representing the \p Extend source type, or \p MVT::Other
	/// if no valid type can be determined
	static EVT calculatePreExtendType(SDValue Extend) {
	switch (Extend.getOpcode()) {
	case ISD::SIGN_EXTEND:
	case ISD::ZERO_EXTEND:
	return Extend.getOperand(0).getValueType();
	case ISD::AssertSext:
	case ISD::AssertZext:
	case ISD::SIGN_EXTEND_INREG: {
	VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
	if (!TypeNode)
	return MVT::Other;
	return TypeNode->getVT();
	}
	case ISD::AND: {
	ConstantSDNode *Constant =
	dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
	if (!Constant)
	return MVT::Other;

	uint32_t Mask = Constant->getZExtValue();

	if (Mask == UCHAR_MAX)
	return MVT::i8;
	else if (Mask == USHRT_MAX)
	return MVT::i16;
	else if (Mask == UINT_MAX)
	return MVT::i32;

	return MVT::Other;
	}
	default:
	return MVT::Other;
	}
	}

	/// Combines a buildvector(sext/zext) or shuffle(sext/zext, undef) node pattern
	/// into sext/zext(buildvector) or sext/zext(shuffle) making use of the vector
	/// SExt/ZExt rather than the scalar SExt/ZExt
	static SDValue performBuildShuffleExtendCombine(SDValue BV, SelectionDAG &DAG) {
	EVT VT = BV.getValueType();
	if (BV.getOpcode() != ISD::BUILD_VECTOR &&
	BV.getOpcode() != ISD::VECTOR_SHUFFLE)
	return SDValue();

	// Use the first item in the buildvector/shuffle to get the size of the
	// extend, and make sure it looks valid.
	SDValue Extend = BV->getOperand(0);
	unsigned ExtendOpcode = Extend.getOpcode();
	bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND \|\|
	ExtendOpcode == ISD::SIGN_EXTEND_INREG \|\|
	ExtendOpcode == ISD::AssertSext;
	if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
	ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
	return SDValue();
	// Shuffle inputs are vector, limit to SIGN_EXTEND and ZERO_EXTEND to ensure
	// calculatePreExtendType will work without issue.
	if (BV.getOpcode() == ISD::VECTOR_SHUFFLE &&
	ExtendOpcode != ISD::SIGN_EXTEND && ExtendOpcode != ISD::ZERO_EXTEND)
	return SDValue();

	// Restrict valid pre-extend data type
	EVT PreExtendType = calculatePreExtendType(Extend);
	if (PreExtendType == MVT::Other \|\|
	PreExtendType.getScalarSizeInBits() != VT.getScalarSizeInBits() / 2)
	return SDValue();

	// Make sure all other operands are equally extended
	for (SDValue Op : drop_begin(BV->ops())) {
	if (Op.isUndef())
	continue;
	unsigned Opc = Op.getOpcode();
	bool OpcIsSExt = Opc == ISD::SIGN_EXTEND \|\| Opc == ISD::SIGN_EXTEND_INREG \|\|
	Opc == ISD::AssertSext;
	if (OpcIsSExt != IsSExt \|\| calculatePreExtendType(Op) != PreExtendType)
	return SDValue();
	}

	SDValue NBV;
	SDLoc DL(BV);
	if (BV.getOpcode() == ISD::BUILD_VECTOR) {
	EVT PreExtendVT = VT.changeVectorElementType(PreExtendType);
	EVT PreExtendLegalType =
	PreExtendType.getScalarSizeInBits() < 32 ? MVT::i32 : PreExtendType;
	SmallVector<SDValue, 8> NewOps;
	for (SDValue Op : BV->ops())
	NewOps.push_back(Op.isUndef() ? DAG.getUNDEF(PreExtendLegalType)
	: DAG.getAnyExtOrTrunc(Op.getOperand(0), DL,
	PreExtendLegalType));
	NBV = DAG.getNode(ISD::BUILD_VECTOR, DL, PreExtendVT, NewOps);
	} else { // BV.getOpcode() == ISD::VECTOR_SHUFFLE
	EVT PreExtendVT = VT.changeVectorElementType(PreExtendType.getScalarType());
	NBV = DAG.getVectorShuffle(PreExtendVT, DL, BV.getOperand(0).getOperand(0),
	BV.getOperand(1).isUndef()
	? DAG.getUNDEF(PreExtendVT)
	: BV.getOperand(1).getOperand(0),
	cast<ShuffleVectorSDNode>(BV)->getMask());
	}
	return DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, NBV);
	}

	/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
	/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
	static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
	// If the value type isn't a vector, none of the operands are going to be dups
	EVT VT = Mul->getValueType(0);
	if (VT != MVT::v8i16 && VT != MVT::v4i32 && VT != MVT::v2i64)
	return SDValue();

	SDValue Op0 = performBuildShuffleExtendCombine(Mul->getOperand(0), DAG);
	SDValue Op1 = performBuildShuffleExtendCombine(Mul->getOperand(1), DAG);

	// Neither operands have been changed, don't make any further changes
	if (!Op0 && !Op1)
	return SDValue();

	SDLoc DL(Mul);
	return DAG.getNode(Mul->getOpcode(), DL, VT, Op0 ? Op0 : Mul->getOperand(0),
	Op1 ? Op1 : Mul->getOperand(1));
	}

	static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {

	if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
	return Ext;

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	// Canonicalize X(Y+1) -> XY+X and (X+1)Y -> XY+Y,
	// and in MachineCombiner pass, add+mul will be combined into madd.
	// Similarly, X(1-Y) -> X - XY and (1-Y)X -> X - YX.
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);
	SDValue MulOper;
	unsigned AddSubOpc;

	auto IsAddSubWith1 = [&](SDValue V) -> bool {
	AddSubOpc = V->getOpcode();
	if ((AddSubOpc == ISD::ADD \|\| AddSubOpc == ISD::SUB) && V->hasOneUse()) {
	SDValue Opnd = V->getOperand(1);
	MulOper = V->getOperand(0);
	if (AddSubOpc == ISD::SUB)
	std::swap(Opnd, MulOper);
	if (auto C = dyn_cast<ConstantSDNode>(Opnd))
	return C->isOne();
	}
	return false;
	};

	if (IsAddSubWith1(N0)) {
	SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
	return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
	}

	if (IsAddSubWith1(N1)) {
	SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
	return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
	}

	// The below optimizations require a constant RHS.
	if (!isa<ConstantSDNode>(N1))
	return SDValue();

	ConstantSDNode *C = cast<ConstantSDNode>(N1);
	const APInt &ConstValue = C->getAPIntValue();

	// Allow the scaling to be folded into the `cnt` instruction by preventing
	// the scaling to be obscured here. This makes it easier to pattern match.
	if (IsSVECntIntrinsic(N0) \|\|
	(N0->getOpcode() == ISD::TRUNCATE &&
	(IsSVECntIntrinsic(N0->getOperand(0)))))
	if (ConstValue.sge(1) && ConstValue.sle(16))
	return SDValue();

	// Multiplication of a power of two plus/minus one can be done more
	// cheaply as as shift+add/sub. For now, this is true unilaterally. If
	// future CPUs have a cheaper MADD instruction, this may need to be
	// gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
	// 64-bit is 5 cycles, so this is always a win.
	// More aggressively, some multiplications N0 * C can be lowered to
	// shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
	// e.g. 6=32=(2+1)2.
	// TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
	// which equals to (1+2)*16-(1+2).

	// TrailingZeroes is used to test if the mul can be lowered to
	// shift+add+shift.
	unsigned TrailingZeroes = ConstValue.countTrailingZeros();
	if (TrailingZeroes) {
	// Conservatively do not lower to shift+add+shift if the mul might be
	// folded into smul or umul.
	if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) \|\|
	isZeroExtended(N0.getNode(), DAG)))
	return SDValue();
	// Conservatively do not lower to shift+add+shift if the mul might be
	// folded into madd or msub.
	if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD \|\|
	N->use_begin()->getOpcode() == ISD::SUB))
	return SDValue();
	}
	// Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
	// and shift+add+shift.
	APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);

	unsigned ShiftAmt;
	// Is the shifted value the LHS operand of the add/sub?
	bool ShiftValUseIsN0 = true;
	// Do we need to negate the result?
	bool NegateResult = false;

	if (ConstValue.isNonNegative()) {
	// (mul x, 2^N + 1) => (add (shl x, N), x)
	// (mul x, 2^N - 1) => (sub (shl x, N), x)
	// (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
	APInt SCVMinus1 = ShiftedConstValue - 1;
	APInt CVPlus1 = ConstValue + 1;
	if (SCVMinus1.isPowerOf2()) {
	ShiftAmt = SCVMinus1.logBase2();
	AddSubOpc = ISD::ADD;
	} else if (CVPlus1.isPowerOf2()) {
	ShiftAmt = CVPlus1.logBase2();
	AddSubOpc = ISD::SUB;
	} else
	return SDValue();
	} else {
	// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
	// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
	APInt CVNegPlus1 = -ConstValue + 1;
	APInt CVNegMinus1 = -ConstValue - 1;
	if (CVNegPlus1.isPowerOf2()) {
	ShiftAmt = CVNegPlus1.logBase2();
	AddSubOpc = ISD::SUB;
	ShiftValUseIsN0 = false;
	} else if (CVNegMinus1.isPowerOf2()) {
	ShiftAmt = CVNegMinus1.logBase2();
	AddSubOpc = ISD::ADD;
	NegateResult = true;
	} else
	return SDValue();
	}

	SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
	DAG.getConstant(ShiftAmt, DL, MVT::i64));

	SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
	SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
	SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
	assert(!(NegateResult && TrailingZeroes) &&
	"NegateResult and TrailingZeroes cannot both be true for now.");
	// Negate the result.
	if (NegateResult)
	return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
	// Shift the result.
	if (TrailingZeroes)
	return DAG.getNode(ISD::SHL, DL, VT, Res,
	DAG.getConstant(TrailingZeroes, DL, MVT::i64));
	return Res;
	}

	static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
	SelectionDAG &DAG) {
	// Take advantage of vector comparisons producing 0 or -1 in each lane to
	// optimize away operation when it's from a constant.
	//
	// The general transformation is:
	// UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
	// AND(VECTOR_CMP(x,y), constant2)
	// constant2 = UNARYOP(constant)

	// Early exit if this isn't a vector operation, the operand of the
	// unary operation isn't a bitwise AND, or if the sizes of the operations
	// aren't the same.
	EVT VT = N->getValueType(0);
	if (!VT.isVector() \|\| N->getOperand(0)->getOpcode() != ISD::AND \|\|
	N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC \|\|
	VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
	return SDValue();

	// Now check that the other operand of the AND is a constant. We could
	// make the transformation for non-constant splats as well, but it's unclear
	// that would be a benefit as it would not eliminate any operations, just
	// perform one more step in scalar code before moving to the vector unit.
	if (BuildVectorSDNode *BV =
	dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
	// Bail out if the vector isn't a constant.
	if (!BV->isConstant())
	return SDValue();

	// Everything checks out. Build up the new and improved node.
	SDLoc DL(N);
	EVT IntVT = BV->getValueType(0);
	// Create a new constant of the appropriate type for the transformed
	// DAG.
	SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
	// The AND node needs bitcasts to/from an integer vector type around it.
	SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
	SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
	N->getOperand(0)->getOperand(0), MaskConst);
	SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
	return Res;
	}

	return SDValue();
	}

	static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	// First try to optimize away the conversion when it's conditionally from
	// a constant. Vectors only.
	if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
	return Res;

	EVT VT = N->getValueType(0);
	if (VT != MVT::f32 && VT != MVT::f64)
	return SDValue();

	// Only optimize when the source and destination types have the same width.
	if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
	return SDValue();

	// If the result of an integer load is only used by an integer-to-float
	// conversion, use a fp load instead and a AdvSIMD scalar {S\|U}CVTF instead.
	// This eliminates an "integer-to-vector-move" UOP and improves throughput.
	SDValue N0 = N->getOperand(0);
	if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
	// Do not change the width of a volatile load.
	!cast<LoadSDNode>(N0)->isVolatile()) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
	LN0->getPointerInfo(), LN0->getAlign(),
	LN0->getMemOperand()->getFlags());

	// Make sure successors of the original load stay after it by updating them
	// to use the new Chain.
	DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));

	unsigned Opcode =
	(N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
	return DAG.getNode(Opcode, SDLoc(N), VT, Load);
	}

	return SDValue();
	}

	/// Fold a floating-point multiply by power of two into floating-point to
	/// fixed-point conversion.
	static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	if (!N->getValueType(0).isSimple())
	return SDValue();

	SDValue Op = N->getOperand(0);
	if (!Op.getValueType().isSimple() \|\| Op.getOpcode() != ISD::FMUL)
	return SDValue();

	if (!Op.getValueType().is64BitVector() && !Op.getValueType().is128BitVector())
	return SDValue();

	SDValue ConstVec = Op->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
	uint32_t FloatBits = FloatTy.getSizeInBits();
	if (FloatBits != 32 && FloatBits != 64 &&
	(FloatBits != 16 \|\| !Subtarget->hasFullFP16()))
	return SDValue();

	MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
	uint32_t IntBits = IntTy.getSizeInBits();
	if (IntBits != 16 && IntBits != 32 && IntBits != 64)
	return SDValue();

	// Avoid conversions where iN is larger than the float (e.g., float -> i64).
	if (IntBits > FloatBits)
	return SDValue();

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t Bits = IntBits == 64 ? 64 : 32;
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
	if (C == -1 \|\| C == 0 \|\| C > Bits)
	return SDValue();

	EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
	if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
	return SDValue();

	if (N->getOpcode() == ISD::FP_TO_SINT_SAT \|\|
	N->getOpcode() == ISD::FP_TO_UINT_SAT) {
	EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	if (SatVT.getScalarSizeInBits() != IntBits \|\| IntBits != FloatBits)
	return SDValue();
	}

	SDLoc DL(N);
	bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT \|\|
	N->getOpcode() == ISD::FP_TO_SINT_SAT);
	unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
	: Intrinsic::aarch64_neon_vcvtfp2fxu;
	SDValue FixConv =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
	DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
	Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
	// We can handle smaller integers by generating an extra trunc.
	if (IntBits < FloatBits)
	FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);

	return FixConv;
	}

	/// Fold a floating-point divide by power of two into fixed-point to
	/// floating-point conversion.
	static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	if (!Subtarget->hasNEON())
	return SDValue();

	SDValue Op = N->getOperand(0);
	unsigned Opc = Op->getOpcode();
	if (!Op.getValueType().isVector() \|\| !Op.getValueType().isSimple() \|\|
	!Op.getOperand(0).getValueType().isSimple() \|\|
	(Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
	return SDValue();

	SDValue ConstVec = N->getOperand(1);
	if (!isa<BuildVectorSDNode>(ConstVec))
	return SDValue();

	MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
	int32_t IntBits = IntTy.getSizeInBits();
	if (IntBits != 16 && IntBits != 32 && IntBits != 64)
	return SDValue();

	MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
	int32_t FloatBits = FloatTy.getSizeInBits();
	if (FloatBits != 32 && FloatBits != 64)
	return SDValue();

	// Avoid conversions where iN is larger than the float (e.g., i64 -> float).
	if (IntBits > FloatBits)
	return SDValue();

	BitVector UndefElements;
	BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
	int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
	if (C == -1 \|\| C == 0 \|\| C > FloatBits)
	return SDValue();

	MVT ResTy;
	unsigned NumLanes = Op.getValueType().getVectorNumElements();
	switch (NumLanes) {
	default:
	return SDValue();
	case 2:
	ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
	break;
	case 4:
	ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
	break;
	}

	if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
	return SDValue();

	SDLoc DL(N);
	SDValue ConvInput = Op.getOperand(0);
	bool IsSigned = Opc == ISD::SINT_TO_FP;
	if (IntBits < FloatBits)
	ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
	ResTy, ConvInput);

	unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
	: Intrinsic::aarch64_neon_vcvtfxu2fp;
	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
	DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
	DAG.getConstant(C, DL, MVT::i32));
	}

	/// An EXTR instruction is made up of two shifts, ORed together. This helper
	/// searches for and classifies those shifts.
	static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
	bool &FromHi) {
	if (N.getOpcode() == ISD::SHL)
	FromHi = false;
	else if (N.getOpcode() == ISD::SRL)
	FromHi = true;
	else
	return false;

	if (!isa<ConstantSDNode>(N.getOperand(1)))
	return false;

	ShiftAmount = N->getConstantOperandVal(1);
	Src = N->getOperand(0);
	return true;
	}

	/// EXTR instruction extracts a contiguous chunk of bits from two existing
	/// registers viewed as a high/low pair. This function looks for the pattern:
	/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
	/// with an EXTR. Can't quite be done in TableGen because the two immediates
	/// aren't independent.
	static SDValue tryCombineToEXTR(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	assert(N->getOpcode() == ISD::OR && "Unexpected root");

	if (VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	SDValue LHS;
	uint32_t ShiftLHS = 0;
	bool LHSFromHi = false;
	if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
	return SDValue();

	SDValue RHS;
	uint32_t ShiftRHS = 0;
	bool RHSFromHi = false;
	if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
	return SDValue();

	// If they're both trying to come from the high part of the register, they're
	// not really an EXTR.
	if (LHSFromHi == RHSFromHi)
	return SDValue();

	if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
	return SDValue();

	if (LHSFromHi) {
	std::swap(LHS, RHS);
	std::swap(ShiftLHS, ShiftRHS);
	}

	return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
	DAG.getConstant(ShiftRHS, DL, MVT::i64));
	}

	static SDValue tryCombineToBSL(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	if (!VT.isVector())
	return SDValue();

	// The combining code currently only works for NEON vectors. In particular,
	// it does not work for SVE when dealing with vectors wider than 128 bits.
	if (!VT.is64BitVector() && !VT.is128BitVector())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	if (N0.getOpcode() != ISD::AND)
	return SDValue();

	SDValue N1 = N->getOperand(1);
	if (N1.getOpcode() != ISD::AND)
	return SDValue();

	// InstCombine does (not (neg a)) => (add a -1).
	// Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
	// Loop over all combinations of AND operands.
	for (int i = 1; i >= 0; --i) {
	for (int j = 1; j >= 0; --j) {
	SDValue O0 = N0->getOperand(i);
	SDValue O1 = N1->getOperand(j);
	SDValue Sub, Add, SubSibling, AddSibling;

	// Find a SUB and an ADD operand, one from each AND.
	if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
	Sub = O0;
	Add = O1;
	SubSibling = N0->getOperand(1 - i);
	AddSibling = N1->getOperand(1 - j);
	} else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
	Add = O0;
	Sub = O1;
	AddSibling = N0->getOperand(1 - i);
	SubSibling = N1->getOperand(1 - j);
	} else
	continue;

	if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
	continue;

	// Constant ones is always righthand operand of the Add.
	if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
	continue;

	if (Sub.getOperand(1) != Add.getOperand(0))
	continue;

	return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
	}
	}

	// (or (and a b) (and (not a) c)) => (bsl a b c)
	// We only have to look for constant vectors here since the general, variable
	// case can be handled in TableGen.
	unsigned Bits = VT.getScalarSizeInBits();
	uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
	for (int i = 1; i >= 0; --i)
	for (int j = 1; j >= 0; --j) {
	BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
	BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
	if (!BVN0 \|\| !BVN1)
	continue;

	bool FoundMatch = true;
	for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
	ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
	ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
	if (!CN0 \|\| !CN1 \|\|
	CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
	FoundMatch = false;
	break;
	}
	}

	if (FoundMatch)
	return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
	N0->getOperand(1 - i), N1->getOperand(1 - j));
	}

	return SDValue();
	}

	// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
	// convert to csel(ccmp(.., cc0)), depending on cc1:

	// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
	// =>
	// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
	//
	// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
	// =>
	// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
	static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDValue CSel0 = N->getOperand(0);
	SDValue CSel1 = N->getOperand(1);

	if (CSel0.getOpcode() != AArch64ISD::CSEL \|\|
	CSel1.getOpcode() != AArch64ISD::CSEL)
	return SDValue();

	if (!CSel0->hasOneUse() \|\| !CSel1->hasOneUse())
	return SDValue();

	if (!isNullConstant(CSel0.getOperand(0)) \|\|
	!isOneConstant(CSel0.getOperand(1)) \|\|
	!isNullConstant(CSel1.getOperand(0)) \|\|
	!isOneConstant(CSel1.getOperand(1)))
	return SDValue();

	SDValue Cmp0 = CSel0.getOperand(3);
	SDValue Cmp1 = CSel1.getOperand(3);
	AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
	AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
	if (!Cmp0->hasOneUse() \|\| !Cmp1->hasOneUse())
	return SDValue();
	if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
	Cmp0.getOpcode() == AArch64ISD::SUBS) {
	std::swap(Cmp0, Cmp1);
	std::swap(CC0, CC1);
	}

	if (Cmp1.getOpcode() != AArch64ISD::SUBS)
	return SDValue();

	SDLoc DL(N);
	SDValue CCmp;

	if (N->getOpcode() == ISD::AND) {
	AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
	SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
	SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
	CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
	Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
	} else {
	SDLoc DL(N);
	AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
	SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
	unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
	SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
	CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
	Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
	}
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
	CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
	CCmp);
	}

	static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	if (SDValue R = performANDORCSELCombine(N, DAG))
	return R;

	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
	if (SDValue Res = tryCombineToEXTR(N, DCI))
	return Res;

	if (SDValue Res = tryCombineToBSL(N, DCI))
	return Res;

	return SDValue();
	}

	static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
	if (!MemVT.getVectorElementType().isSimple())
	return false;

	uint64_t MaskForTy = 0ull;
	switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
	case MVT::i8:
	MaskForTy = 0xffull;
	break;
	case MVT::i16:
	MaskForTy = 0xffffull;
	break;
	case MVT::i32:
	MaskForTy = 0xffffffffull;
	break;
	default:
	return false;
	break;
	}

	if (N->getOpcode() == AArch64ISD::DUP \|\| N->getOpcode() == ISD::SPLAT_VECTOR)
	if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
	return Op0->getAPIntValue().getLimitedValue() == MaskForTy;

	return false;
	}

	static SDValue performSVEAndCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDValue Src = N->getOperand(0);
	unsigned Opc = Src->getOpcode();

	// Zero/any extend of an unsigned unpack
	if (Opc == AArch64ISD::UUNPKHI \|\| Opc == AArch64ISD::UUNPKLO) {
	SDValue UnpkOp = Src->getOperand(0);
	SDValue Dup = N->getOperand(1);

	if (Dup.getOpcode() != ISD::SPLAT_VECTOR)
	return SDValue();

	SDLoc DL(N);
	ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
	if (!C)
	return SDValue();

	uint64_t ExtVal = C->getZExtValue();

	// If the mask is fully covered by the unpack, we don't need to push
	// a new AND onto the operand
	EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
	if ((ExtVal == 0xFF && EltTy == MVT::i8) \|\|
	(ExtVal == 0xFFFF && EltTy == MVT::i16) \|\|
	(ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
	return Src;

	// Truncate to prevent a DUP with an over wide constant
	APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());

	// Otherwise, make sure we propagate the AND to the operand
	// of the unpack
	Dup = DAG.getNode(ISD::SPLAT_VECTOR, DL, UnpkOp->getValueType(0),
	DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));

	SDValue And = DAG.getNode(ISD::AND, DL,
	UnpkOp->getValueType(0), UnpkOp, Dup);

	return DAG.getNode(Opc, DL, N->getValueType(0), And);
	}

	if (!EnableCombineMGatherIntrinsics)
	return SDValue();

	SDValue Mask = N->getOperand(1);

	if (!Src.hasOneUse())
	return SDValue();

	EVT MemVT;

	// SVE load instructions perform an implicit zero-extend, which makes them
	// perfect candidates for combining.
	switch (Opc) {
	case AArch64ISD::LD1_MERGE_ZERO:
	case AArch64ISD::LDNF1_MERGE_ZERO:
	case AArch64ISD::LDFF1_MERGE_ZERO:
	MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
	break;
	case AArch64ISD::GLD1_MERGE_ZERO:
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	case AArch64ISD::GLDFF1_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
	case AArch64ISD::GLDNT1_MERGE_ZERO:
	MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
	break;
	default:
	return SDValue();
	}

	if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
	return Src;

	return SDValue();
	}

	static SDValue performANDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	EVT VT = N->getValueType(0);

	if (SDValue R = performANDORCSELCombine(N, DAG))
	return R;

	if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
	return SDValue();

	if (VT.isScalableVector())
	return performSVEAndCombine(N, DCI);

	// The combining code below works only for NEON vectors. In particular, it
	// does not work for SVE when dealing with vectors wider than 128 bits.
	if (!VT.is64BitVector() && !VT.is128BitVector())
	return SDValue();

	BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
	if (!BVN)
	return SDValue();

	// AND does not accept an immediate, so check if we can use a BIC immediate
	// instruction instead. We do this here instead of using a (and x, (mvni imm))
	// pattern in isel, because some immediates may be lowered to the preferred
	// (and x, (movi imm)) form, even though an mvni representation also exists.
	APInt DefBits(VT.getSizeInBits(), 0);
	APInt UndefBits(VT.getSizeInBits(), 0);
	if (resolveBuildVector(BVN, DefBits, UndefBits)) {
	SDValue NewOp;

	DefBits = ~DefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
	DefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
	DefBits, &LHS)))
	return NewOp;

	UndefBits = ~UndefBits;
	if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
	UndefBits, &LHS)) \|\|
	(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
	UndefBits, &LHS)))
	return NewOp;
	}

	return SDValue();
	}

	static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
	switch (Opcode) {
	case ISD::STRICT_FADD:
	case ISD::FADD:
	return (FullFP16 && VT == MVT::f16) \|\| VT == MVT::f32 \|\| VT == MVT::f64;
	case ISD::ADD:
	return VT == MVT::i64;
	default:
	return false;
	}
	}

	static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
	AArch64CC::CondCode Cond);

	static bool isPredicateCCSettingOp(SDValue N) {
	if ((N.getOpcode() == ISD::SETCC) \|\|
	(N.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
	(N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilege \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilegt \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehi \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilehs \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilele \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelo \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilels \|\|
	N.getConstantOperandVal(0) == Intrinsic::aarch64_sve_whilelt \|\|
	// get_active_lane_mask is lowered to a whilelo instruction.
	N.getConstantOperandVal(0) == Intrinsic::get_active_lane_mask)))
	return true;

	return false;
	}

	// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
	// ... into: "ptrue p, all" + PTEST
	static SDValue
	performFirstTrueTestVectorCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
	// Make sure PTEST can be legalised with illegal types.
	if (!Subtarget->hasSVE() \|\| DCI.isBeforeLegalize())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	EVT VT = N0.getValueType();

	if (!VT.isScalableVector() \|\| VT.getVectorElementType() != MVT::i1 \|\|
	!isNullConstant(N->getOperand(1)))
	return SDValue();

	// Restricted the DAG combine to only cases where we're extracting from a
	// flag-setting operation.
	if (!isPredicateCCSettingOp(N0))
	return SDValue();

	// Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
	SelectionDAG &DAG = DCI.DAG;
	SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all);
	return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::FIRST_ACTIVE);
	}

	// Materialize : Idx = (add (mul vscale, NumEls), -1)
	// i1 = extract_vector_elt t37, Constant:i64<Idx>
	// ... into: "ptrue p, all" + PTEST
	static SDValue
	performLastTrueTestVectorCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
	// Make sure PTEST is legal types.
	if (!Subtarget->hasSVE() \|\| DCI.isBeforeLegalize())
	return SDValue();

	SDValue N0 = N->getOperand(0);
	EVT OpVT = N0.getValueType();

	if (!OpVT.isScalableVector() \|\| OpVT.getVectorElementType() != MVT::i1)
	return SDValue();

	// Idx == (add (mul vscale, NumEls), -1)
	SDValue Idx = N->getOperand(1);
	if (Idx.getOpcode() != ISD::ADD \|\| !isAllOnesConstant(Idx.getOperand(1)))
	return SDValue();

	SDValue VS = Idx.getOperand(0);
	if (VS.getOpcode() != ISD::VSCALE)
	return SDValue();

	unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
	if (VS.getConstantOperandVal(0) != NumEls)
	return SDValue();

	// Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
	SelectionDAG &DAG = DCI.DAG;
	SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
	return getPTest(DAG, N->getValueType(0), Pg, N0, AArch64CC::LAST_ACTIVE);
	}

	static SDValue
	performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
	if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
	return Res;
	if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
	return Res;

	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);

	EVT VT = N->getValueType(0);
	const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
	bool IsStrict = N0->isStrictFPOpcode();

	// extract(dup x) -> x
	if (N0.getOpcode() == AArch64ISD::DUP)
	return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);

	// Rewrite for pairwise fadd pattern
	// (f32 (extract_vector_elt
	// (fadd (vXf32 Other)
	// (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
	// ->
	// (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
	// (extract_vector_elt (vXf32 Other) 1))
	// For strict_fadd we need to make sure the old strict_fadd can be deleted, so
	// we can only do this when it's used only by the extract_vector_elt.
	if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
	hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
	(!IsStrict \|\| N0.hasOneUse())) {
	SDLoc DL(N0);
	SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
	SDValue N01 = N0->getOperand(IsStrict ? 2 : 1);

	ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
	SDValue Other = N00;

	// And handle the commutative case.
	if (!Shuffle) {
	Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
	Other = N01;
	}

	if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
	Other == Shuffle->getOperand(0)) {
	SDValue Extract1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
	DAG.getConstant(0, DL, MVT::i64));
	SDValue Extract2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
	DAG.getConstant(1, DL, MVT::i64));
	if (!IsStrict)
	return DAG.getNode(N0->getOpcode(), DL, VT, Extract1, Extract2);

	// For strict_fadd we need uses of the final extract_vector to be replaced
	// with the strict_fadd, but we also need uses of the chain output of the
	// original strict_fadd to use the chain output of the new strict_fadd as
	// otherwise it may not be deleted.
	SDValue Ret = DAG.getNode(N0->getOpcode(), DL,
	{VT, MVT::Other},
	{N0->getOperand(0), Extract1, Extract2});
	DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Ret);
	DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Ret.getValue(1));
	return SDValue(N, 0);
	}
	}

	return SDValue();
	}

	static SDValue performConcatVectorsCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc dl(N);
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
	unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();

	if (VT.isScalableVector())
	return SDValue();

	// Optimize concat_vectors of truncated vectors, where the intermediate
	// type is illegal, to avoid said illegality, e.g.,
	// (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
	// (v2i16 (truncate (v2i64)))))
	// ->
	// (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
	// (v4i32 (bitcast (v2i64))),
	// <0, 2, 4, 6>)))
	// This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
	// on both input and result type, so we might generate worse code.
	// On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
	if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
	N1Opc == ISD::TRUNCATE) {
	SDValue N00 = N0->getOperand(0);
	SDValue N10 = N1->getOperand(0);
	EVT N00VT = N00.getValueType();

	if (N00VT == N10.getValueType() &&
	(N00VT == MVT::v2i64 \|\| N00VT == MVT::v4i32) &&
	N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
	MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
	SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
	for (size_t i = 0; i < Mask.size(); ++i)
	Mask[i] = i * 2;
	return DAG.getNode(ISD::TRUNCATE, dl, VT,
	DAG.getVectorShuffle(
	MidVT, dl,
	DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
	DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
	}
	}

	if (N->getOperand(0).getValueType() == MVT::v4i8) {
	// If we have a concat of v4i8 loads, convert them to a buildvector of f32
	// loads to prevent having to go through the v4i8 load legalization that
	// needs to extend each element into a larger type.
	if (N->getNumOperands() % 2 == 0 && all_of(N->op_values(), [](SDValue V) {
	if (V.getValueType() != MVT::v4i8)
	return false;
	if (V.isUndef())
	return true;
	LoadSDNode *LD = dyn_cast<LoadSDNode>(V);
	return LD && V.hasOneUse() && LD->isSimple() && !LD->isIndexed() &&
	LD->getExtensionType() == ISD::NON_EXTLOAD;
	})) {
	EVT NVT =
	EVT::getVectorVT(*DAG.getContext(), MVT::f32, N->getNumOperands());
	SmallVector<SDValue> Ops;

	for (unsigned i = 0; i < N->getNumOperands(); i++) {
	SDValue V = N->getOperand(i);
	if (V.isUndef())
	Ops.push_back(DAG.getUNDEF(MVT::f32));
	else {
	LoadSDNode *LD = cast<LoadSDNode>(V);
	SDValue NewLoad =
	DAG.getLoad(MVT::f32, dl, LD->getChain(), LD->getBasePtr(),
	LD->getMemOperand());
	DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
	Ops.push_back(NewLoad);
	}
	}
	return DAG.getBitcast(N->getValueType(0),
	DAG.getBuildVector(NVT, dl, Ops));
	}
	}


	// Wait 'til after everything is legalized to try this. That way we have
	// legal vector types and such.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	// Optimise concat_vectors of two [us]avgceils or [us]avgfloors that use
	// extracted subvectors from the same original vectors. Combine these into a
	// single avg that operates on the two original vectors.
	// avgceil is the target independant name for rhadd, avgfloor is a hadd.
	// Example:
	// (concat_vectors (v8i8 (avgceils (extract_subvector (v16i8 OpA, <0>),
	// extract_subvector (v16i8 OpB, <0>))),
	// (v8i8 (avgceils (extract_subvector (v16i8 OpA, <8>),
	// extract_subvector (v16i8 OpB, <8>)))))
	// ->
	// (v16i8(avgceils(v16i8 OpA, v16i8 OpB)))
	if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
	(N0Opc == ISD::AVGCEILU \|\| N0Opc == ISD::AVGCEILS \|\|
	N0Opc == ISD::AVGFLOORU \|\| N0Opc == ISD::AVGFLOORS)) {
	SDValue N00 = N0->getOperand(0);
	SDValue N01 = N0->getOperand(1);
	SDValue N10 = N1->getOperand(0);
	SDValue N11 = N1->getOperand(1);

	EVT N00VT = N00.getValueType();
	EVT N10VT = N10.getValueType();

	if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
	SDValue N00Source = N00->getOperand(0);
	SDValue N01Source = N01->getOperand(0);
	SDValue N10Source = N10->getOperand(0);
	SDValue N11Source = N11->getOperand(0);

	if (N00Source == N10Source && N01Source == N11Source &&
	N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
	assert(N0.getValueType() == N1.getValueType());

	uint64_t N00Index = N00.getConstantOperandVal(1);
	uint64_t N01Index = N01.getConstantOperandVal(1);
	uint64_t N10Index = N10.getConstantOperandVal(1);
	uint64_t N11Index = N11.getConstantOperandVal(1);

	if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
	N10Index == N00VT.getVectorNumElements())
	return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
	}
	}
	}

	// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
	// splat. The indexed instructions are going to be expecting a DUPLANE64, so
	// canonicalise to that.
	if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
	assert(VT.getScalarSizeInBits() == 64);
	return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
	DAG.getConstant(0, dl, MVT::i64));
	}

	// Canonicalise concat_vectors so that the right-hand vector has as few
	// bit-casts as possible before its real operation. The primary matching
	// destination for these operations will be the narrowing "2" instructions,
	// which depend on the operation being performed on this right-hand vector.
	// For example,
	// (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
	// becomes
	// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))

	if (N->getNumOperands() != 2 \|\| N1Opc != ISD::BITCAST)
	return SDValue();
	SDValue RHS = N1->getOperand(0);
	MVT RHSTy = RHS.getValueType().getSimpleVT();
	// If the RHS is not a vector, this is not the pattern we're looking for.
	if (!RHSTy.isVector())
	return SDValue();

	LLVM_DEBUG(
	dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");

	MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
	RHSTy.getVectorNumElements() * 2);
	return DAG.getNode(ISD::BITCAST, dl, VT,
	DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
	DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
	RHS));
	}

	static SDValue
	performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	EVT VT = N->getValueType(0);
	if (!VT.isScalableVector() \|\| VT.getVectorElementType() != MVT::i1)
	return SDValue();

	SDValue V = N->getOperand(0);

	// NOTE: This combine exists in DAGCombiner, but that version's legality check
	// blocks this combine because the non-const case requires custom lowering.
	//
	// ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const)
	if (V.getOpcode() == ISD::SPLAT_VECTOR)
	if (isa<ConstantSDNode>(V.getOperand(0)))
	return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0));

	return SDValue();
	}

	static SDValue
	performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Vec = N->getOperand(0);
	SDValue SubVec = N->getOperand(1);
	uint64_t IdxVal = N->getConstantOperandVal(2);
	EVT VecVT = Vec.getValueType();
	EVT SubVT = SubVec.getValueType();

	// Only do this for legal fixed vector types.
	if (!VecVT.isFixedLengthVector() \|\|
	!DAG.getTargetLoweringInfo().isTypeLegal(VecVT) \|\|
	!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
	return SDValue();

	// Ignore widening patterns.
	if (IdxVal == 0 && Vec.isUndef())
	return SDValue();

	// Subvector must be half the width and an "aligned" insertion.
	unsigned NumSubElts = SubVT.getVectorNumElements();
	if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() \|\|
	(IdxVal != 0 && IdxVal != NumSubElts))
	return SDValue();

	// Fold insert_subvector -> concat_vectors
	// insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
	// insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
	SDValue Lo, Hi;
	if (IdxVal == 0) {
	Lo = SubVec;
	Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
	DAG.getVectorIdxConstant(NumSubElts, DL));
	} else {
	Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
	DAG.getVectorIdxConstant(0, DL));
	Hi = SubVec;
	}
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
	}

	static SDValue tryCombineFixedPointConvert(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// Wait until after everything is legalized to try this. That way we have
	// legal vector types and such.
	if (DCI.isBeforeLegalizeOps())
	return SDValue();
	// Transform a scalar conversion of a value from a lane extract into a
	// lane extract of a vector conversion. E.g., from foo1 to foo2:
	// double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
	// double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
	//
	// The second form interacts better with instruction selection and the
	// register allocator to avoid cross-class register copies that aren't
	// coalescable due to a lane reference.

	// Check the operand and see if it originates from a lane extract.
	SDValue Op1 = N->getOperand(1);
	if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	// Yep, no additional predication needed. Perform the transform.
	SDValue IID = N->getOperand(0);
	SDValue Shift = N->getOperand(2);
	SDValue Vec = Op1.getOperand(0);
	SDValue Lane = Op1.getOperand(1);
	EVT ResTy = N->getValueType(0);
	EVT VecResTy;
	SDLoc DL(N);

	// The vector width should be 128 bits by the time we get here, even
	// if it started as 64 bits (the extract_vector handling will have
	// done so). Bail if it is not.
	if (Vec.getValueSizeInBits() != 128)
	return SDValue();

	if (Vec.getValueType() == MVT::v4i32)
	VecResTy = MVT::v4f32;
	else if (Vec.getValueType() == MVT::v2i64)
	VecResTy = MVT::v2f64;
	else
	return SDValue();

	SDValue Convert =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
	}

	// AArch64 high-vector "long" operations are formed by performing the non-high
	// version on an extract_subvector of each operand which gets the high half:
	//
	// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
	//
	// However, there are cases which don't have an extract_high explicitly, but
	// have another operation that can be made compatible with one for free. For
	// example:
	//
	// (dupv64 scalar) --> (extract_high (dup128 scalar))
	//
	// This routine does the actual conversion of such DUPs, once outer routines
	// have determined that everything else is in order.
	// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
	// similarly here.
	static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
	MVT VT = N.getSimpleValueType();
	if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	N.getConstantOperandVal(1) == 0)
	N = N.getOperand(0);

	switch (N.getOpcode()) {
	case AArch64ISD::DUP:
	case AArch64ISD::DUPLANE8:
	case AArch64ISD::DUPLANE16:
	case AArch64ISD::DUPLANE32:
	case AArch64ISD::DUPLANE64:
	case AArch64ISD::MOVI:
	case AArch64ISD::MOVIshift:
	case AArch64ISD::MOVIedit:
	case AArch64ISD::MOVImsl:
	case AArch64ISD::MVNIshift:
	case AArch64ISD::MVNImsl:
	break;
	default:
	// FMOV could be supported, but isn't very useful, as it would only occur
	// if you passed a bitcast' floating point immediate to an eligible long
	// integer op (addl, smull, ...).
	return SDValue();
	}

	if (!VT.is64BitVector())
	return SDValue();

	SDLoc DL(N);
	unsigned NumElems = VT.getVectorNumElements();
	if (N.getValueType().is64BitVector()) {
	MVT ElementTy = VT.getVectorElementType();
	MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
	N = DAG.getNode(N->getOpcode(), DL, NewVT, N->ops());
	}

	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N,
	DAG.getConstant(NumElems, DL, MVT::i64));
	}

	static bool isEssentiallyExtractHighSubvector(SDValue N) {
	if (N.getOpcode() == ISD::BITCAST)
	N = N.getOperand(0);
	if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
	return false;
	if (N.getOperand(0).getValueType().isScalableVector())
	return false;
	return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
	N.getOperand(0).getValueType().getVectorNumElements() / 2;
	}

	/// Helper structure to keep track of ISD::SET_CC operands.
	struct GenericSetCCInfo {
	const SDValue *Opnd0;
	const SDValue *Opnd1;
	ISD::CondCode CC;
	};

	/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
	struct AArch64SetCCInfo {
	const SDValue *Cmp;
	AArch64CC::CondCode CC;
	};

	/// Helper structure to keep track of SetCC information.
	union SetCCInfo {
	GenericSetCCInfo Generic;
	AArch64SetCCInfo AArch64;
	};

	/// Helper structure to be able to read SetCC information. If set to
	/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
	/// GenericSetCCInfo.
	struct SetCCInfoAndKind {
	SetCCInfo Info;
	bool IsAArch64;
	};

	/// Check whether or not \p Op is a SET_CC operation, either a generic or
	/// an
	/// AArch64 lowered one.
	/// \p SetCCInfo is filled accordingly.
	/// \post SetCCInfo is meanginfull only when this function returns true.
	/// \return True when Op is a kind of SET_CC operation.
	static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
	// If this is a setcc, this is straight forward.
	if (Op.getOpcode() == ISD::SETCC) {
	SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
	SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
	SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
	SetCCInfo.IsAArch64 = false;
	return true;
	}
	// Otherwise, check if this is a matching csel instruction.
	// In other words:
	// - csel 1, 0, cc
	// - csel 0, 1, !cc
	if (Op.getOpcode() != AArch64ISD::CSEL)
	return false;
	// Set the information about the operands.
	// TODO: we want the operands of the Cmp not the csel
	SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
	SetCCInfo.IsAArch64 = true;
	SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
	cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());

	// Check that the operands matches the constraints:
	// (1) Both operands must be constants.
	// (2) One must be 1 and the other must be 0.
	ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
	ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));

	// Check (1).
	if (!TValue \|\| !FValue)
	return false;

	// Check (2).
	if (!TValue->isOne()) {
	// Update the comparison when we are interested in !cc.
	std::swap(TValue, FValue);
	SetCCInfo.Info.AArch64.CC =
	AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
	}
	return TValue->isOne() && FValue->isZero();
	}

	// Returns true if Op is setcc or zext of setcc.
	static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
	if (isSetCC(Op, Info))
	return true;
	return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
	isSetCC(Op->getOperand(0), Info));
	}

	// The folding we want to perform is:
	// (add x, [zext] (setcc cc ...) )
	// -->
	// (csel x, (add x, 1), !cc ...)
	//
	// The latter will get matched to a CSINC instruction.
	static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
	assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
	SDValue LHS = Op->getOperand(0);
	SDValue RHS = Op->getOperand(1);
	SetCCInfoAndKind InfoAndKind;

	// If both operands are a SET_CC, then we don't want to perform this
	// folding and create another csel as this results in more instructions
	// (and higher register usage).
	if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
	isSetCCOrZExtSetCC(RHS, InfoAndKind))
	return SDValue();

	// If neither operand is a SET_CC, give up.
	if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
	std::swap(LHS, RHS);
	if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
	return SDValue();
	}

	// FIXME: This could be generatized to work for FP comparisons.
	EVT CmpVT = InfoAndKind.IsAArch64
	? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
	: InfoAndKind.Info.Generic.Opnd0->getValueType();
	if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
	return SDValue();

	SDValue CCVal;
	SDValue Cmp;
	SDLoc dl(Op);
	if (InfoAndKind.IsAArch64) {
	CCVal = DAG.getConstant(
	AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
	MVT::i32);
	Cmp = *InfoAndKind.Info.AArch64.Cmp;
	} else
	Cmp = getAArch64Cmp(
	InfoAndKind.Info.Generic.Opnd0, InfoAndKind.Info.Generic.Opnd1,
	ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
	dl);

	EVT VT = Op->getValueType(0);
	LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
	return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
	}

	// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
	static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	// Only scalar integer and vector types.
	if (N->getOpcode() != ISD::ADD \|\| !VT.isScalarInteger())
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\| LHS.getValueType() != VT)
	return SDValue();

	auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
	auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
	if (!LHSN1 \|\| LHSN1 != RHSN1 \|\| !RHSN1->isZero())
	return SDValue();

	SDValue Op1 = LHS->getOperand(0);
	SDValue Op2 = RHS->getOperand(0);
	EVT OpVT1 = Op1.getValueType();
	EVT OpVT2 = Op2.getValueType();
	if (Op1.getOpcode() != AArch64ISD::UADDV \|\| OpVT1 != OpVT2 \|\|
	Op2.getOpcode() != AArch64ISD::UADDV \|\|
	OpVT1.getVectorElementType() != VT)
	return SDValue();

	SDValue Val1 = Op1.getOperand(0);
	SDValue Val2 = Op2.getOperand(0);
	EVT ValVT = Val1->getValueType(0);
	SDLoc DL(N);
	SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
	DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
	DAG.getConstant(0, DL, MVT::i64));
	}

	/// Perform the scalar expression combine in the form of:
	/// CSEL(c, 1, cc) + b => CSINC(b+c, b, cc)
	/// CSNEG(c, -1, cc) + b => CSINC(b+c, b, cc)
	static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	if (!VT.isScalarInteger() \|\| N->getOpcode() != ISD::ADD)
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// Handle commutivity.
	if (LHS.getOpcode() != AArch64ISD::CSEL &&
	LHS.getOpcode() != AArch64ISD::CSNEG) {
	std::swap(LHS, RHS);
	if (LHS.getOpcode() != AArch64ISD::CSEL &&
	LHS.getOpcode() != AArch64ISD::CSNEG) {
	return SDValue();
	}
	}

	if (!LHS.hasOneUse())
	return SDValue();

	AArch64CC::CondCode AArch64CC =
	static_cast<AArch64CC::CondCode>(LHS.getConstantOperandVal(2));

	// The CSEL should include a const one operand, and the CSNEG should include
	// One or NegOne operand.
	ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(LHS.getOperand(0));
	ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
	if (!CTVal \|\| !CFVal)
	return SDValue();

	if (!(LHS.getOpcode() == AArch64ISD::CSEL &&
	(CTVal->isOne() \|\| CFVal->isOne())) &&
	!(LHS.getOpcode() == AArch64ISD::CSNEG &&
	(CTVal->isOne() \|\| CFVal->isAllOnes())))
	return SDValue();

	// Switch CSEL(1, c, cc) to CSEL(c, 1, !cc)
	if (LHS.getOpcode() == AArch64ISD::CSEL && CTVal->isOne() &&
	!CFVal->isOne()) {
	std::swap(CTVal, CFVal);
	AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
	}

	SDLoc DL(N);
	// Switch CSNEG(1, c, cc) to CSNEG(-c, -1, !cc)
	if (LHS.getOpcode() == AArch64ISD::CSNEG && CTVal->isOne() &&
	!CFVal->isAllOnes()) {
	APInt C = -1 * CFVal->getAPIntValue();
	CTVal = cast<ConstantSDNode>(DAG.getConstant(C, DL, VT));
	CFVal = cast<ConstantSDNode>(DAG.getAllOnesConstant(DL, VT));
	AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
	}

	// It might be neutral for larger constants, as the immediate need to be
	// materialized in a register.
	APInt ADDC = CTVal->getAPIntValue();
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isLegalAddImmediate(ADDC.getSExtValue()))
	return SDValue();

	assert(((LHS.getOpcode() == AArch64ISD::CSEL && CFVal->isOne()) \|\|
	(LHS.getOpcode() == AArch64ISD::CSNEG && CFVal->isAllOnes())) &&
	"Unexpected constant value");

	SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0));
	SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32);
	SDValue Cmp = LHS.getOperand(3);

	return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp);
	}

	// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
	static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	if (N->getOpcode() != ISD::ADD)
	return SDValue();

	SDValue Dot = N->getOperand(0);
	SDValue A = N->getOperand(1);
	// Handle commutivity
	auto isZeroDot = [](SDValue Dot) {
	return (Dot.getOpcode() == AArch64ISD::UDOT \|\|
	Dot.getOpcode() == AArch64ISD::SDOT) &&
	isZerosVector(Dot.getOperand(0).getNode());
	};
	if (!isZeroDot(Dot))
	std::swap(Dot, A);
	if (!isZeroDot(Dot))
	return SDValue();

	return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
	Dot.getOperand(2));
	}

	static bool isNegatedInteger(SDValue Op) {
	return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
	}

	static SDValue getNegatedInteger(SDValue Op, SelectionDAG &DAG) {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue Zero = DAG.getConstant(0, DL, VT);
	return DAG.getNode(ISD::SUB, DL, VT, Zero, Op);
	}

	// Try to fold
	//
	// (neg (csel X, Y)) -> (csel (neg X), (neg Y))
	//
	// The folding helps csel to be matched with csneg without generating
	// redundant neg instruction, which includes negation of the csel expansion
	// of abs node lowered by lowerABS.
	static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
	if (!isNegatedInteger(SDValue(N, 0)))
	return SDValue();

	SDValue CSel = N->getOperand(1);
	if (CSel.getOpcode() != AArch64ISD::CSEL \|\| !CSel->hasOneUse())
	return SDValue();

	SDValue N0 = CSel.getOperand(0);
	SDValue N1 = CSel.getOperand(1);

	// If both of them is not negations, it's not worth the folding as it
	// introduces two additional negations while reducing one negation.
	if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
	return SDValue();

	SDValue N0N = getNegatedInteger(N0, DAG);
	SDValue N1N = getNegatedInteger(N1, DAG);

	SDLoc DL(N);
	EVT VT = CSel.getValueType();
	return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0N, N1N, CSel.getOperand(2),
	CSel.getOperand(3));
	}

	// The basic add/sub long vector instructions have variants with "2" on the end
	// which act on the high-half of their inputs. They are normally matched by
	// patterns like:
	//
	// (add (zeroext (extract_high LHS)),
	// (zeroext (extract_high RHS)))
	// -> uaddl2 vD, vN, vM
	//
	// However, if one of the extracts is something like a duplicate, this
	// instruction can still be used profitably. This function puts the DAG into a
	// more appropriate form for those patterns to trigger.
	static SDValue performAddSubLongCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	MVT VT = N->getSimpleValueType(0);
	if (!VT.is128BitVector()) {
	if (N->getOpcode() == ISD::ADD)
	return performSetccAddFolding(N, DAG);
	return SDValue();
	}

	// Make sure both branches are extended in the same way.
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
	LHS.getOpcode() != ISD::SIGN_EXTEND) \|\|
	LHS.getOpcode() != RHS.getOpcode())
	return SDValue();

	unsigned ExtType = LHS.getOpcode();

	// It's not worth doing if at least one of the inputs isn't already an
	// extract, but we don't know which it'll be so we have to try both.
	if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
	RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
	if (!RHS.getNode())
	return SDValue();

	RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
	} else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
	LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
	if (!LHS.getNode())
	return SDValue();

	LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
	}

	return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
	}

	static bool isCMP(SDValue Op) {
	return Op.getOpcode() == AArch64ISD::SUBS &&
	!Op.getNode()->hasAnyUseOfValue(0);
	}

	// (CSEL 1 0 CC Cond) => CC
	// (CSEL 0 1 CC Cond) => !CC
	static Optional<AArch64CC::CondCode> getCSETCondCode(SDValue Op) {
	if (Op.getOpcode() != AArch64ISD::CSEL)
	return None;
	auto CC = static_cast<AArch64CC::CondCode>(Op.getConstantOperandVal(2));
	if (CC == AArch64CC::AL \|\| CC == AArch64CC::NV)
	return None;
	SDValue OpLHS = Op.getOperand(0);
	SDValue OpRHS = Op.getOperand(1);
	if (isOneConstant(OpLHS) && isNullConstant(OpRHS))
	return CC;
	if (isNullConstant(OpLHS) && isOneConstant(OpRHS))
	return getInvertedCondCode(CC);

	return None;
	}

	// (ADC{S} l r (CMP (CSET HS carry) 1)) => (ADC{S} l r carry)
	// (SBC{S} l r (CMP 0 (CSET LO carry))) => (SBC{S} l r carry)
	static SDValue foldOverflowCheck(SDNode *Op, SelectionDAG &DAG, bool IsAdd) {
	SDValue CmpOp = Op->getOperand(2);
	if (!isCMP(CmpOp))
	return SDValue();

	if (IsAdd) {
	if (!isOneConstant(CmpOp.getOperand(1)))
	return SDValue();
	} else {
	if (!isNullConstant(CmpOp.getOperand(0)))
	return SDValue();
	}

	SDValue CsetOp = CmpOp->getOperand(IsAdd ? 0 : 1);
	auto CC = getCSETCondCode(CsetOp);
	if (CC != (IsAdd ? AArch64CC::HS : AArch64CC::LO))
	return SDValue();

	return DAG.getNode(Op->getOpcode(), SDLoc(Op), Op->getVTList(),
	Op->getOperand(0), Op->getOperand(1),
	CsetOp.getOperand(3));
	}

	// (ADC x 0 cond) => (CINC x HS cond)
	static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) {
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	SDValue Cond = N->getOperand(2);

	if (!isNullConstant(RHS))
	return SDValue();

	EVT VT = N->getValueType(0);
	SDLoc DL(N);

	// (CINC x cc cond) <=> (CSINC x x !cc cond)
	SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32);
	return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond);
	}

	// Transform vector add(zext i8 to i32, zext i8 to i32)
	// into sext(add(zext(i8 to i16), zext(i8 to i16)) to i32)
	// This allows extra uses of saddl/uaddl at the lower vector widths, and less
	// extends.
	static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	if (!VT.isFixedLengthVector() \|\| VT.getSizeInBits() <= 128 \|\|
	(N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
	N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND) \|\|
	(N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
	N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND) \|\|
	N->getOperand(0).getOperand(0).getValueType() !=
	N->getOperand(1).getOperand(0).getValueType())
	return SDValue();

	SDValue N0 = N->getOperand(0).getOperand(0);
	SDValue N1 = N->getOperand(1).getOperand(0);
	EVT InVT = N0.getValueType();

	EVT S1 = InVT.getScalarType();
	EVT S2 = VT.getScalarType();
	if ((S2 == MVT::i32 && S1 == MVT::i8) \|\|
	(S2 == MVT::i64 && (S1 == MVT::i8 \|\| S1 == MVT::i16))) {
	SDLoc DL(N);
	EVT HalfVT = EVT::getVectorVT(*DAG.getContext(),
	S2.getHalfSizedIntegerVT(*DAG.getContext()),
	VT.getVectorElementCount());
	SDValue NewN0 = DAG.getNode(N->getOperand(0).getOpcode(), DL, HalfVT, N0);
	SDValue NewN1 = DAG.getNode(N->getOperand(1).getOpcode(), DL, HalfVT, N1);
	SDValue NewOp = DAG.getNode(N->getOpcode(), DL, HalfVT, NewN0, NewN1);
	return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewOp);
	}
	return SDValue();
	}

	static SDValue performBuildVectorCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	// A build vector of two extracted elements is equivalent to an
	// extract subvector where the inner vector is any-extended to the
	// extract_vector_elt VT.
	// (build_vector (extract_elt_iXX_to_i32 vec Idx+0)
	// (extract_elt_iXX_to_i32 vec Idx+1))
	// => (extract_subvector (anyext_iXX_to_i32 vec) Idx)

	// For now, only consider the v2i32 case, which arises as a result of
	// legalization.
	if (N->getValueType(0) != MVT::v2i32)
	return SDValue();

	SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
	// Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
	if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
	// Constant index.
	isa<ConstantSDNode>(Elt0->getOperand(1)) &&
	isa<ConstantSDNode>(Elt1->getOperand(1)) &&
	// Both EXTRACT_VECTOR_ELT from same vector...
	Elt0->getOperand(0) == Elt1->getOperand(0) &&
	// ... and contiguous. First element's index +1 == second element's index.
	Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) {
	SDValue VecToExtend = Elt0->getOperand(0);
	EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32);
	if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
	return SDValue();

	SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);

	SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
	SubvectorIdx);
	}

	return SDValue();
	}

	static SDValue performAddSubCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// Try to change sum of two reductions.
	if (SDValue Val = performAddUADDVCombine(N, DAG))
	return Val;
	if (SDValue Val = performAddDotCombine(N, DAG))
	return Val;
	if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
	return Val;
	if (SDValue Val = performNegCSelCombine(N, DAG))
	return Val;
	if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
	return Val;

	return performAddSubLongCombine(N, DCI, DAG);
	}

	// Massage DAGs which we can use the high-half "long" operations on into
	// something isel will recognize better. E.g.
	//
	// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
	// (aarch64_neon_umull (extract_high (v2i64 vec)))
	// (extract_high (v2i64 (dup128 scalar)))))
	//
	static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
	SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
	assert(LHS.getValueType().is64BitVector() &&
	RHS.getValueType().is64BitVector() &&
	"unexpected shape for long operation");

	// Either node could be a DUP, but it's not worth doing both of them (you'd
	// just as well use the non-high version) so look for a corresponding extract
	// operation on the other "wing".
	if (isEssentiallyExtractHighSubvector(LHS)) {
	RHS = tryExtendDUPToExtractHigh(RHS, DAG);
	if (!RHS.getNode())
	return SDValue();
	} else if (isEssentiallyExtractHighSubvector(RHS)) {
	LHS = tryExtendDUPToExtractHigh(LHS, DAG);
	if (!LHS.getNode())
	return SDValue();
	}

	if (IID == Intrinsic::not_intrinsic)
	return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
	N->getOperand(0), LHS, RHS);
	}

	static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
	MVT ElemTy = N->getSimpleValueType(0).getScalarType();
	unsigned ElemBits = ElemTy.getSizeInBits();

	int64_t ShiftAmount;
	if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
	APInt SplatValue, SplatUndef;
	unsigned SplatBitSize;
	bool HasAnyUndefs;
	if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
	HasAnyUndefs, ElemBits) \|\|
	SplatBitSize != ElemBits)
	return SDValue();

	ShiftAmount = SplatValue.getSExtValue();
	} else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
	ShiftAmount = CVN->getSExtValue();
	} else
	return SDValue();

	unsigned Opcode;
	bool IsRightShift;
	switch (IID) {
	default:
	llvm_unreachable("Unknown shift intrinsic");
	case Intrinsic::aarch64_neon_sqshl:
	Opcode = AArch64ISD::SQSHL_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_uqshl:
	Opcode = AArch64ISD::UQSHL_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_srshl:
	Opcode = AArch64ISD::SRSHR_I;
	IsRightShift = true;
	break;
	case Intrinsic::aarch64_neon_urshl:
	Opcode = AArch64ISD::URSHR_I;
	IsRightShift = true;
	break;
	case Intrinsic::aarch64_neon_sqshlu:
	Opcode = AArch64ISD::SQSHLU_I;
	IsRightShift = false;
	break;
	case Intrinsic::aarch64_neon_sshl:
	case Intrinsic::aarch64_neon_ushl:
	// For positive shift amounts we can use SHL, as ushl/sshl perform a regular
	// left shift for positive shift amounts. Below, we only replace the current
	// node with VSHL, if this condition is met.
	Opcode = AArch64ISD::VSHL;
	IsRightShift = false;
	break;
	}

	if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
	SDLoc dl(N);
	return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
	DAG.getConstant(-ShiftAmount, dl, MVT::i32));
	} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
	SDLoc dl(N);
	return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
	DAG.getConstant(ShiftAmount, dl, MVT::i32));
	}

	return SDValue();
	}

	// The CRC32[BH] instructions ignore the high bits of their data operand. Since
	// the intrinsics must be legal and take an i32, this means there's almost
	// certainly going to be a zext in the DAG which we can eliminate.
	static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
	SDValue AndN = N->getOperand(2);
	if (AndN.getOpcode() != ISD::AND)
	return SDValue();

	ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
	if (!CMask \|\| CMask->getZExtValue() != Mask)
	return SDValue();

	return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
	N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
	}

	static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
	SelectionDAG &DAG) {
	SDLoc dl(N);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
	DAG.getNode(Opc, dl,
	N->getOperand(1).getSimpleValueType(),
	N->getOperand(1)),
	DAG.getConstant(0, dl, MVT::i64));
	}

	static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Op1 = N->getOperand(1);
	SDValue Op2 = N->getOperand(2);
	EVT ScalarTy = Op2.getValueType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	ScalarTy = MVT::i32;

	// Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
	SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
	SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
	SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
	SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
	return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
	}

	static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
	SDLoc dl(N);
	SDValue Scalar = N->getOperand(3);
	EVT ScalarTy = Scalar.getValueType();

	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);

	SDValue Passthru = N->getOperand(1);
	SDValue Pred = N->getOperand(2);
	return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
	Pred, Scalar, Passthru);
	}

	static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
	SDLoc dl(N);
	LLVMContext &Ctx = *DAG.getContext();
	EVT VT = N->getValueType(0);

	assert(VT.isScalableVector() && "Expected a scalable vector.");

	// Current lowering only supports the SVE-ACLE types.
	if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
	return SDValue();

	unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
	unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
	EVT ByteVT =
	EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));

	// Convert everything to the domain of EXT (i.e bytes).
	SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
	SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
	SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
	DAG.getConstant(ElemSize, dl, MVT::i32));

	SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
	return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
	}

	static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalize())
	return SDValue();

	SDValue Comparator = N->getOperand(3);
	if (Comparator.getOpcode() == AArch64ISD::DUP \|\|
	Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
	unsigned IID = getIntrinsicID(N);
	EVT VT = N->getValueType(0);
	EVT CmpVT = N->getOperand(2).getValueType();
	SDValue Pred = N->getOperand(1);
	SDValue Imm;
	SDLoc DL(N);

	switch (IID) {
	default:
	llvm_unreachable("Called with wrong intrinsic!");
	break;

	// Signed comparisons
	case Intrinsic::aarch64_sve_cmpeq_wide:
	case Intrinsic::aarch64_sve_cmpne_wide:
	case Intrinsic::aarch64_sve_cmpge_wide:
	case Intrinsic::aarch64_sve_cmpgt_wide:
	case Intrinsic::aarch64_sve_cmplt_wide:
	case Intrinsic::aarch64_sve_cmple_wide: {
	if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
	int64_t ImmVal = CN->getSExtValue();
	if (ImmVal >= -16 && ImmVal <= 15)
	Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
	else
	return SDValue();
	}
	break;
	}
	// Unsigned comparisons
	case Intrinsic::aarch64_sve_cmphs_wide:
	case Intrinsic::aarch64_sve_cmphi_wide:
	case Intrinsic::aarch64_sve_cmplo_wide:
	case Intrinsic::aarch64_sve_cmpls_wide: {
	if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
	uint64_t ImmVal = CN->getZExtValue();
	if (ImmVal <= 127)
	Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
	else
	return SDValue();
	}
	break;
	}
	}

	if (!Imm)
	return SDValue();

	SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
	N->getOperand(2), Splat, DAG.getCondCode(CC));
	}

	return SDValue();
	}

	static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
	AArch64CC::CondCode Cond) {
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();

	SDLoc DL(Op);
	assert(Op.getValueType().isScalableVector() &&
	TLI.isTypeLegal(Op.getValueType()) &&
	"Expected legal scalable vector type!");
	assert(Op.getValueType() == Pg.getValueType() &&
	"Expected same type for PTEST operands");

	// Ensure target specific opcodes are using legal type.
	EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
	SDValue TVal = DAG.getConstant(1, DL, OutVT);
	SDValue FVal = DAG.getConstant(0, DL, OutVT);

	// Ensure operands have type nxv16i1.
	if (Op.getValueType() != MVT::nxv16i1) {
	if ((Cond == AArch64CC::ANY_ACTIVE \|\| Cond == AArch64CC::NONE_ACTIVE) &&
	isZeroingInactiveLanes(Op))
	Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Pg);
	else
	Pg = getSVEPredicateBitCast(MVT::nxv16i1, Pg, DAG);
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, Op);
	}

	// Set condition code (CC) flags.
	SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);

	// Convert CC to integer based on requested condition.
	// NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
	SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
	SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
	return DAG.getZExtOrTrunc(Res, DL, VT);
	}

	static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue VecToReduce = N->getOperand(2);

	// NOTE: The integer reduction's result type is not always linked to the
	// operand's element type so we construct it from the intrinsic's result type.
	EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue VecToReduce = N->getOperand(2);

	EVT ReduceVT = VecToReduce.getValueType();
	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
	SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Pred = N->getOperand(1);
	SDValue InitVal = N->getOperand(2);
	SDValue VecToReduce = N->getOperand(3);
	EVT ReduceVT = VecToReduce.getValueType();

	// Ordered reductions use the first lane of the result vector as the
	// reduction's initial value.
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
	DAG.getUNDEF(ReduceVT), InitVal, Zero);

	SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);

	// SVE reductions set the whole vector register with the first element
	// containing the reduction result, which we'll now extract.
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
	Zero);
	}

	static bool isAllInactivePredicate(SDValue N) {
	// Look through cast.
	while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
	N = N.getOperand(0);

	return ISD::isConstantSplatVectorAllZeros(N.getNode());
	}

	static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
	unsigned NumElts = N.getValueType().getVectorMinNumElements();

	// Look through cast.
	while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
	N = N.getOperand(0);
	// When reinterpreting from a type with fewer elements the "new" elements
	// are not active, so bail if they're likely to be used.
	if (N.getValueType().getVectorMinNumElements() < NumElts)
	return false;
	}

	if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
	return true;

	// "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
	// or smaller than the implicit element type represented by N.
	// NOTE: A larger element count implies a smaller element type.
	if (N.getOpcode() == AArch64ISD::PTRUE &&
	N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
	return N.getValueType().getVectorMinNumElements() >= NumElts;

	// If we're compiling for a specific vector-length, we can check if the
	// pattern's VL equals that of the scalable vector at runtime.
	if (N.getOpcode() == AArch64ISD::PTRUE) {
	const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
	unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
	unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
	if (MaxSVESize && MinSVESize == MaxSVESize) {
	unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
	unsigned PatNumElts =
	getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
	return PatNumElts == (NumElts * VScale);
	}
	}

	return false;
	}

	// If a merged operation has no inactive lanes we can relax it to a predicated
	// or unpredicated operation, which potentially allows better isel (perhaps
	// using immediate forms) or relaxing register reuse requirements.
	static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
	SelectionDAG &DAG, bool UnpredOp = false,
	bool SwapOperands = false) {
	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
	assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
	SDValue Pg = N->getOperand(1);
	SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
	SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);

	// ISD way to specify an all active predicate.
	if (isAllActivePredicate(DAG, Pg)) {
	if (UnpredOp)
	return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);

	return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
	}

	// FUTURE: SplatVector(true)
	return SDValue();
	}

	static SDValue performIntrinsicCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	SelectionDAG &DAG = DCI.DAG;
	unsigned IID = getIntrinsicID(N);
	switch (IID) {
	default:
	break;
	case Intrinsic::get_active_lane_mask: {
	SDValue Res = SDValue();
	EVT VT = N->getValueType(0);
	if (VT.isFixedLengthVector()) {
	// We can use the SVE whilelo instruction to lower this intrinsic by
	// creating the appropriate sequence of scalable vector operations and
	// then extracting a fixed-width subvector from the scalable vector.

	SDLoc DL(N);
	SDValue ID =
	DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, MVT::i64);

	EVT WhileVT = EVT::getVectorVT(
	*DAG.getContext(), MVT::i1,
	ElementCount::getScalable(VT.getVectorNumElements()));

	// Get promoted scalable vector VT, i.e. promote nxv4i1 -> nxv4i32.
	EVT PromVT = getPromotedVTForPredicate(WhileVT);

	// Get the fixed-width equivalent of PromVT for extraction.
	EVT ExtVT =
	EVT::getVectorVT(*DAG.getContext(), PromVT.getVectorElementType(),
	VT.getVectorElementCount());

	Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WhileVT, ID,
	N->getOperand(1), N->getOperand(2));
	Res = DAG.getNode(ISD::SIGN_EXTEND, DL, PromVT, Res);
	Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, Res,
	DAG.getConstant(0, DL, MVT::i64));
	Res = DAG.getNode(ISD::TRUNCATE, DL, VT, Res);
	}
	return Res;
	}
	case Intrinsic::aarch64_neon_vcvtfxs2fp:
	case Intrinsic::aarch64_neon_vcvtfxu2fp:
	return tryCombineFixedPointConvert(N, DCI, DAG);
	case Intrinsic::aarch64_neon_saddv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
	case Intrinsic::aarch64_neon_uaddv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
	case Intrinsic::aarch64_neon_sminv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
	case Intrinsic::aarch64_neon_uminv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
	case Intrinsic::aarch64_neon_smaxv:
	return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
	case Intrinsic::aarch64_neon_umaxv:
	return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
	case Intrinsic::aarch64_neon_fmax:
	return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fmin:
	return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fmaxnm:
	return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_fminnm:
	return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_smull:
	return DAG.getNode(AArch64ISD::SMULL, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_umull:
	return DAG.getNode(AArch64ISD::UMULL, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_neon_pmull:
	case Intrinsic::aarch64_neon_sqdmull:
	return tryCombineLongOpWithDup(IID, N, DCI, DAG);
	case Intrinsic::aarch64_neon_sqshl:
	case Intrinsic::aarch64_neon_uqshl:
	case Intrinsic::aarch64_neon_sqshlu:
	case Intrinsic::aarch64_neon_srshl:
	case Intrinsic::aarch64_neon_urshl:
	case Intrinsic::aarch64_neon_sshl:
	case Intrinsic::aarch64_neon_ushl:
	return tryCombineShiftImm(IID, N, DAG);
	case Intrinsic::aarch64_crc32b:
	case Intrinsic::aarch64_crc32cb:
	return tryCombineCRC32(0xff, N, DAG);
	case Intrinsic::aarch64_crc32h:
	case Intrinsic::aarch64_crc32ch:
	return tryCombineCRC32(0xffff, N, DAG);
	case Intrinsic::aarch64_sve_saddv:
	// There is no i64 version of SADDV because the sign is irrelevant.
	if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
	return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
	else
	return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_uaddv:
	return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_smaxv:
	return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_umaxv:
	return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_sminv:
	return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_uminv:
	return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_orv:
	return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
	case Intrinsic::aarch64_sve_eorv:
	return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
	case Intrinsic::aarch64_sve_andv:
	return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
	case Intrinsic::aarch64_sve_index:
	return LowerSVEIntrinsicIndex(N, DAG);
	case Intrinsic::aarch64_sve_dup:
	return LowerSVEIntrinsicDUP(N, DAG);
	case Intrinsic::aarch64_sve_dup_x:
	return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
	N->getOperand(1));
	case Intrinsic::aarch64_sve_ext:
	return LowerSVEIntrinsicEXT(N, DAG);
	case Intrinsic::aarch64_sve_mul:
	return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
	case Intrinsic::aarch64_sve_smulh:
	return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
	case Intrinsic::aarch64_sve_umulh:
	return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
	case Intrinsic::aarch64_sve_smin:
	return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
	case Intrinsic::aarch64_sve_umin:
	return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
	case Intrinsic::aarch64_sve_smax:
	return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
	case Intrinsic::aarch64_sve_umax:
	return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
	case Intrinsic::aarch64_sve_lsl:
	return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
	case Intrinsic::aarch64_sve_lsr:
	return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
	case Intrinsic::aarch64_sve_asr:
	return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
	case Intrinsic::aarch64_sve_fadd:
	return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
	case Intrinsic::aarch64_sve_fsub:
	return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
	case Intrinsic::aarch64_sve_fmul:
	return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
	case Intrinsic::aarch64_sve_add:
	return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
	case Intrinsic::aarch64_sve_sub:
	return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
	case Intrinsic::aarch64_sve_subr:
	return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
	case Intrinsic::aarch64_sve_and:
	return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
	case Intrinsic::aarch64_sve_bic:
	return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
	case Intrinsic::aarch64_sve_eor:
	return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
	case Intrinsic::aarch64_sve_orr:
	return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
	case Intrinsic::aarch64_sve_sabd:
	return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
	case Intrinsic::aarch64_sve_uabd:
	return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
	case Intrinsic::aarch64_sve_sqadd:
	return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
	case Intrinsic::aarch64_sve_sqsub:
	return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
	case Intrinsic::aarch64_sve_uqadd:
	return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
	case Intrinsic::aarch64_sve_uqsub:
	return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
	case Intrinsic::aarch64_sve_sqadd_x:
	return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_sqsub_x:
	return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_uqadd_x:
	return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_uqsub_x:
	return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2));
	case Intrinsic::aarch64_sve_asrd:
	return DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2), N->getOperand(3));
	case Intrinsic::aarch64_sve_cmphs:
	if (!N->getOperand(2).getValueType().isFloatingPoint())
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
	break;
	case Intrinsic::aarch64_sve_cmphi:
	if (!N->getOperand(2).getValueType().isFloatingPoint())
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
	break;
	case Intrinsic::aarch64_sve_fcmpge:
	case Intrinsic::aarch64_sve_cmpge:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETGE));
	break;
	case Intrinsic::aarch64_sve_fcmpgt:
	case Intrinsic::aarch64_sve_cmpgt:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETGT));
	break;
	case Intrinsic::aarch64_sve_fcmpeq:
	case Intrinsic::aarch64_sve_cmpeq:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
	break;
	case Intrinsic::aarch64_sve_fcmpne:
	case Intrinsic::aarch64_sve_cmpne:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETNE));
	break;
	case Intrinsic::aarch64_sve_fcmpuo:
	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
	N->getValueType(0), N->getOperand(1), N->getOperand(2),
	N->getOperand(3), DAG.getCondCode(ISD::SETUO));
	break;
	case Intrinsic::aarch64_sve_fadda:
	return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
	case Intrinsic::aarch64_sve_faddv:
	return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
	case Intrinsic::aarch64_sve_fmaxnmv:
	return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
	case Intrinsic::aarch64_sve_fmaxv:
	return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
	case Intrinsic::aarch64_sve_fminnmv:
	return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
	case Intrinsic::aarch64_sve_fminv:
	return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
	case Intrinsic::aarch64_sve_sel:
	return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
	N->getOperand(1), N->getOperand(2), N->getOperand(3));
	case Intrinsic::aarch64_sve_cmpeq_wide:
	return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpne_wide:
	return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpge_wide:
	return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpgt_wide:
	return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmplt_wide:
	return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmple_wide:
	return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmphs_wide:
	return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
	case Intrinsic::aarch64_sve_cmphi_wide:
	return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmplo_wide:
	return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
	case Intrinsic::aarch64_sve_cmpls_wide:
	return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
	case Intrinsic::aarch64_sve_ptest_any:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::ANY_ACTIVE);
	case Intrinsic::aarch64_sve_ptest_first:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::FIRST_ACTIVE);
	case Intrinsic::aarch64_sve_ptest_last:
	return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
	AArch64CC::LAST_ACTIVE);
	}
	return SDValue();
	}

	static bool isCheapToExtend(const SDValue &N) {
	unsigned OC = N->getOpcode();
	return OC == ISD::LOAD \|\| OC == ISD::MLOAD \|\|
	ISD::isConstantSplatVectorAllZeros(N.getNode());
	}

	static SDValue
	performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// If we have (sext (setcc A B)) and A and B are cheap to extend,
	// we can move the sext into the arguments and have the same result. For
	// example, if A and B are both loads, we can make those extending loads and
	// avoid an extra instruction. This pattern appears often in VLS code
	// generation where the inputs to the setcc have a different size to the
	// instruction that wants to use the result of the setcc.
	assert(N->getOpcode() == ISD::SIGN_EXTEND &&
	N->getOperand(0)->getOpcode() == ISD::SETCC);
	const SDValue SetCC = N->getOperand(0);

	const SDValue CCOp0 = SetCC.getOperand(0);
	const SDValue CCOp1 = SetCC.getOperand(1);
	if (!CCOp0->getValueType(0).isInteger() \|\|
	!CCOp1->getValueType(0).isInteger())
	return SDValue();

	ISD::CondCode Code =
	cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();

	ISD::NodeType ExtType =
	isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

	if (isCheapToExtend(SetCC.getOperand(0)) &&
	isCheapToExtend(SetCC.getOperand(1))) {
	const SDValue Ext1 =
	DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
	const SDValue Ext2 =
	DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);

	return DAG.getSetCC(
	SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
	cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
	}

	return SDValue();
	}

	static SDValue performExtendCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
	// we can convert that DUP into another extract_high (of a bigger DUP), which
	// helps the backend to decide that an sabdl2 would be useful, saving a real
	// extract_high operation.
	if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
	(N->getOperand(0).getOpcode() == ISD::ABDU \|\|
	N->getOperand(0).getOpcode() == ISD::ABDS)) {
	SDNode *ABDNode = N->getOperand(0).getNode();
	SDValue NewABD =
	tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
	if (!NewABD.getNode())
	return SDValue();

	return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
	}

	if (N->getValueType(0).isFixedLengthVector() &&
	N->getOpcode() == ISD::SIGN_EXTEND &&
	N->getOperand(0)->getOpcode() == ISD::SETCC)
	return performSignExtendSetCCCombine(N, DCI, DAG);

	return SDValue();
	}

	static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
	SDValue SplatVal, unsigned NumVecElts) {
	assert(!St.isTruncatingStore() && "cannot split truncating vector store");
	Align OrigAlignment = St.getAlign();
	unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;

	// Create scalar stores. This is at least as good as the code sequence for a
	// split unaligned store which is a dup.s, ext.b, and two stores.
	// Most of the time the three stores should be replaced by store pair
	// instructions (stp).
	SDLoc DL(&St);
	SDValue BasePtr = St.getBasePtr();
	uint64_t BaseOffset = 0;

	const MachinePointerInfo &PtrInfo = St.getPointerInfo();
	SDValue NewST1 =
	DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
	OrigAlignment, St.getMemOperand()->getFlags());

	// As this in ISel, we will not merge this add which may degrade results.
	if (BasePtr->getOpcode() == ISD::ADD &&
	isa<ConstantSDNode>(BasePtr->getOperand(1))) {
	BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
	BasePtr = BasePtr->getOperand(0);
	}

	unsigned Offset = EltOffset;
	while (--NumVecElts) {
	Align Alignment = commonAlignment(OrigAlignment, Offset);
	SDValue OffsetPtr =
	DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
	DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
	NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
	PtrInfo.getWithOffset(Offset), Alignment,
	St.getMemOperand()->getFlags());
	Offset += EltOffset;
	}
	return NewST1;
	}

	// Returns an SVE type that ContentTy can be trivially sign or zero extended
	// into.
	static MVT getSVEContainerType(EVT ContentTy) {
	assert(ContentTy.isSimple() && "No SVE containers for extended types");

	switch (ContentTy.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("No known SVE container for this MVT type");
	case MVT::nxv2i8:
	case MVT::nxv2i16:
	case MVT::nxv2i32:
	case MVT::nxv2i64:
	case MVT::nxv2f32:
	case MVT::nxv2f64:
	return MVT::nxv2i64;
	case MVT::nxv4i8:
	case MVT::nxv4i16:
	case MVT::nxv4i32:
	case MVT::nxv4f32:
	return MVT::nxv4i32;
	case MVT::nxv8i8:
	case MVT::nxv8i16:
	case MVT::nxv8f16:
	case MVT::nxv8bf16:
	return MVT::nxv8i16;
	case MVT::nxv16i8:
	return MVT::nxv16i8;
	}
	}

	static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	EVT ContainerVT = VT;
	if (ContainerVT.isInteger())
	ContainerVT = getSVEContainerType(ContainerVT);

	SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
	SDValue Ops[] = { N->getOperand(0), // Chain
	N->getOperand(2), // Pg
	N->getOperand(3), // Base
	DAG.getValueType(VT) };

	SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (ContainerVT.isInteger() && (VT != ContainerVT))
	Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));

	return DAG.getMergeValues({ Load, LoadChain }, DL);
	}

	static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	EVT VT = N->getValueType(0);
	EVT PtrTy = N->getOperand(3).getValueType();

	EVT LoadVT = VT;
	if (VT.isFloatingPoint())
	LoadVT = VT.changeTypeToInteger();

	auto *MINode = cast<MemIntrinsicSDNode>(N);
	SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
	SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
	MINode->getOperand(3), DAG.getUNDEF(PtrTy),
	MINode->getOperand(2), PassThru,
	MINode->getMemoryVT(), MINode->getMemOperand(),
	ISD::UNINDEXED, ISD::NON_EXTLOAD, false);

	if (VT.isFloatingPoint()) {
	SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
	return DAG.getMergeValues(Ops, DL);
	}

	return L;
	}

	template <unsigned Opcode>
	static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
	static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO \|\|
	Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
	"Unsupported opcode.");
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	EVT LoadVT = VT;
	if (VT.isFloatingPoint())
	LoadVT = VT.changeTypeToInteger();

	SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
	SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (VT.isFloatingPoint())
	Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));

	return DAG.getMergeValues({Load, LoadChain}, DL);
	}

	static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Data = N->getOperand(2);
	EVT DataVT = Data.getValueType();
	EVT HwSrcVt = getSVEContainerType(DataVT);
	SDValue InputVT = DAG.getValueType(DataVT);

	if (DataVT.isFloatingPoint())
	InputVT = DAG.getValueType(HwSrcVt);

	SDValue SrcNew;
	if (Data.getValueType().isFloatingPoint())
	SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
	else
	SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);

	SDValue Ops[] = { N->getOperand(0), // Chain
	SrcNew,
	N->getOperand(4), // Base
	N->getOperand(3), // Pg
	InputVT
	};

	return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
	}

	static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);

	SDValue Data = N->getOperand(2);
	EVT DataVT = Data.getValueType();
	EVT PtrTy = N->getOperand(4).getValueType();

	if (DataVT.isFloatingPoint())
	Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);

	auto *MINode = cast<MemIntrinsicSDNode>(N);
	return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
	DAG.getUNDEF(PtrTy), MINode->getOperand(3),
	MINode->getMemoryVT(), MINode->getMemOperand(),
	ISD::UNINDEXED, false, false);
	}

	/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
	/// load store optimizer pass will merge them to store pair stores. This should
	/// be better than a movi to create the vector zero followed by a vector store
	/// if the zero constant is not re-used, since one instructions and one register
	/// live range will be removed.
	///
	/// For example, the final generated code should be:
	///
	/// stp xzr, xzr, [x0]
	///
	/// instead of:
	///
	/// movi v0.2d, #0
	/// str q0, [x0]
	///
	static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
	SDValue StVal = St.getValue();
	EVT VT = StVal.getValueType();

	// Avoid scalarizing zero splat stores for scalable vectors.
	if (VT.isScalableVector())
	return SDValue();

	// It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
	// 2, 3 or 4 i32 elements.
	int NumVecElts = VT.getVectorNumElements();
	if (!(((NumVecElts == 2 \|\| NumVecElts == 3) &&
	VT.getVectorElementType().getSizeInBits() == 64) \|\|
	((NumVecElts == 2 \|\| NumVecElts == 3 \|\| NumVecElts == 4) &&
	VT.getVectorElementType().getSizeInBits() == 32)))
	return SDValue();

	if (StVal.getOpcode() != ISD::BUILD_VECTOR)
	return SDValue();

	// If the zero constant has more than one use then the vector store could be
	// better since the constant mov will be amortized and stp q instructions
	// should be able to be formed.
	if (!StVal.hasOneUse())
	return SDValue();

	// If the store is truncating then it's going down to i16 or smaller, which
	// means it can be implemented in a single store anyway.
	if (St.isTruncatingStore())
	return SDValue();

	// If the immediate offset of the address operand is too large for the stp
	// instruction, then bail out.
	if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
	int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
	if (Offset < -512 \|\| Offset > 504)
	return SDValue();
	}

	for (int I = 0; I < NumVecElts; ++I) {
	SDValue EltVal = StVal.getOperand(I);
	if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
	return SDValue();
	}

	// Use a CopyFromReg WZR/XZR here to prevent
	// DAGCombiner::MergeConsecutiveStores from undoing this transformation.
	SDLoc DL(&St);
	unsigned ZeroReg;
	EVT ZeroVT;
	if (VT.getVectorElementType().getSizeInBits() == 32) {
	ZeroReg = AArch64::WZR;
	ZeroVT = MVT::i32;
	} else {
	ZeroReg = AArch64::XZR;
	ZeroVT = MVT::i64;
	}
	SDValue SplatVal =
	DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
	return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
	}

	/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
	/// value. The load store optimizer pass will merge them to store pair stores.
	/// This has better performance than a splat of the scalar followed by a split
	/// vector store. Even if the stores are not merged it is four stores vs a dup,
	/// followed by an ext.b and two stores.
	static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
	SDValue StVal = St.getValue();
	EVT VT = StVal.getValueType();

	// Don't replace floating point stores, they possibly won't be transformed to
	// stp because of the store pair suppress pass.
	if (VT.isFloatingPoint())
	return SDValue();

	// We can express a splat as store pair(s) for 2 or 4 elements.
	unsigned NumVecElts = VT.getVectorNumElements();
	if (NumVecElts != 4 && NumVecElts != 2)
	return SDValue();

	// If the store is truncating then it's going down to i16 or smaller, which
	// means it can be implemented in a single store anyway.
	if (St.isTruncatingStore())
	return SDValue();

	// Check that this is a splat.
	// Make sure that each of the relevant vector element locations are inserted
	// to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
	std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
	SDValue SplatVal;
	for (unsigned I = 0; I < NumVecElts; ++I) {
	// Check for insert vector elements.
	if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
	return SDValue();

	// Check that same value is inserted at each vector element.
	if (I == 0)
	SplatVal = StVal.getOperand(1);
	else if (StVal.getOperand(1) != SplatVal)
	return SDValue();

	// Check insert element index.
	ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
	if (!CIndex)
	return SDValue();
	uint64_t IndexVal = CIndex->getZExtValue();
	if (IndexVal >= NumVecElts)
	return SDValue();
	IndexNotInserted.reset(IndexVal);

	StVal = StVal.getOperand(0);
	}
	// Check that all vector element locations were inserted to.
	if (IndexNotInserted.any())
	return SDValue();

	return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
	}

	static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {

	StoreSDNode *S = cast<StoreSDNode>(N);
	if (S->isVolatile() \|\| S->isIndexed())
	return SDValue();

	SDValue StVal = S->getValue();
	EVT VT = StVal.getValueType();

	if (!VT.isFixedLengthVector())
	return SDValue();

	// If we get a splat of zeros, convert this vector store to a store of
	// scalars. They will be merged into store pairs of xzr thereby removing one
	// instruction and one register.
	if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
	return ReplacedZeroSplat;

	// FIXME: The logic for deciding if an unaligned store should be split should
	// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
	// a call to that function here.

	if (!Subtarget->isMisaligned128StoreSlow())
	return SDValue();

	// Don't split at -Oz.
	if (DAG.getMachineFunction().getFunction().hasMinSize())
	return SDValue();

	// Don't split v2i64 vectors. Memcpy lowering produces those and splitting
	// those up regresses performance on micro-benchmarks and olden/bh.
	if (VT.getVectorNumElements() < 2 \|\| VT == MVT::v2i64)
	return SDValue();

	// Split unaligned 16B stores. They are terrible for performance.
	// Don't split stores with alignment of 1 or 2. Code that uses clang vector
	// extensions can use this to mark that it does not want splitting to happen
	// (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
	// eliminating alignment hazards is only 1 in 8 for alignment of 2.
	if (VT.getSizeInBits() != 128 \|\| S->getAlign() >= Align(16) \|\|
	S->getAlign() <= Align(2))
	return SDValue();

	// If we get a splat of a scalar convert this vector store to a store of
	// scalars. They will be merged into store pairs thereby removing two
	// instructions.
	if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
	return ReplacedSplat;

	SDLoc DL(S);

	// Split VT into two.
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	unsigned NumElts = HalfVT.getVectorNumElements();
	SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
	DAG.getConstant(0, DL, MVT::i64));
	SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
	DAG.getConstant(NumElts, DL, MVT::i64));
	SDValue BasePtr = S->getBasePtr();
	SDValue NewST1 =
	DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
	S->getAlign(), S->getMemOperand()->getFlags());
	SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
	DAG.getConstant(8, DL, MVT::i64));
	return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
	S->getPointerInfo(), S->getAlign(),
	S->getMemOperand()->getFlags());
	}

	static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
	assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");

	// splice(pg, op1, undef) -> op1
	if (N->getOperand(2).isUndef())
	return N->getOperand(1);

	return SDValue();
	}

	static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	assert((N->getOpcode() == AArch64ISD::UUNPKHI \|\|
	N->getOpcode() == AArch64ISD::UUNPKLO) &&
	"Unexpected Opcode!");

	// uunpklo/hi undef -> undef
	if (N->getOperand(0).isUndef())
	return DAG.getUNDEF(N->getValueType(0));

	// If this is a masked load followed by an UUNPKLO, fold this into a masked
	// extending load. We can do this even if this is already a masked
	// {z,}extload.
	if (N->getOperand(0).getOpcode() == ISD::MLOAD &&
	N->getOpcode() == AArch64ISD::UUNPKLO) {
	MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0));
	SDValue Mask = MLD->getMask();
	SDLoc DL(N);

	if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD &&
	SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE &&
	(MLD->getPassThru()->isUndef() \|\|
	isZerosVector(MLD->getPassThru().getNode()))) {
	unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
	unsigned PgPattern = Mask->getConstantOperandVal(0);
	EVT VT = N->getValueType(0);

	// Ensure we can double the size of the predicate pattern
	unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
	if (NumElts &&
	NumElts * VT.getVectorElementType().getSizeInBits() <= MinSVESize) {
	Mask =
	getPTrue(DAG, DL, VT.changeVectorElementType(MVT::i1), PgPattern);
	SDValue PassThru = DAG.getConstant(0, DL, VT);
	SDValue NewLoad = DAG.getMaskedLoad(
	VT, DL, MLD->getChain(), MLD->getBasePtr(), MLD->getOffset(), Mask,
	PassThru, MLD->getMemoryVT(), MLD->getMemOperand(),
	MLD->getAddressingMode(), ISD::ZEXTLOAD);

	DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), NewLoad.getValue(1));

	return NewLoad;
	}
	}
	}

	return SDValue();
	}

	static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);
	EVT ResVT = N->getValueType(0);

	// uzp1(x, undef) -> concat(truncate(x), undef)
	if (Op1.getOpcode() == ISD::UNDEF) {
	EVT BCVT = MVT::Other, HalfVT = MVT::Other;
	switch (ResVT.getSimpleVT().SimpleTy) {
	default:
	break;
	case MVT::v16i8:
	BCVT = MVT::v8i16;
	HalfVT = MVT::v8i8;
	break;
	case MVT::v8i16:
	BCVT = MVT::v4i32;
	HalfVT = MVT::v4i16;
	break;
	case MVT::v4i32:
	BCVT = MVT::v2i64;
	HalfVT = MVT::v2i32;
	break;
	}
	if (BCVT != MVT::Other) {
	SDValue BC = DAG.getBitcast(BCVT, Op0);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, BC);
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Trunc,
	DAG.getUNDEF(HalfVT));
	}
	}

	// uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
	if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
	if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
	SDValue X = Op0.getOperand(0).getOperand(0);
	return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
	}
	}

	// uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
	if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
	if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
	SDValue Z = Op1.getOperand(0).getOperand(1);
	return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
	}
	}

	return SDValue();
	}

	static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
	unsigned Opc = N->getOpcode();

	assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads
	Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) \|\|
	(Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads
	Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&
	"Invalid opcode.");

	const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO \|\|
	Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;

	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Pg = N->getOperand(1);
	SDValue Base = N->getOperand(2);
	SDValue Offset = N->getOperand(3);
	SDValue Ty = N->getOperand(4);

	EVT ResVT = N->getValueType(0);

	const auto OffsetOpc = Offset.getOpcode();
	const bool OffsetIsZExt =
	OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
	const bool OffsetIsSExt =
	OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;

	// Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
	if (!Extended && (OffsetIsSExt \|\| OffsetIsZExt)) {
	SDValue ExtPg = Offset.getOperand(0);
	VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
	EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();

	// If the predicate for the sign- or zero-extended offset is the
	// same as the predicate used for this load and the sign-/zero-extension
	// was from a 32-bits...
	if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
	SDValue UnextendedOffset = Offset.getOperand(1);

	unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
	if (Signed)
	NewOpc = getSignExtendedGatherOpcode(NewOpc);

	return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
	{Chain, Pg, Base, UnextendedOffset, Ty});
	}
	}

	return SDValue();
	}

	/// Optimize a vector shift instruction and its operand if shifted out
	/// bits are not used.
	static SDValue performVectorShiftCombine(SDNode *N,
	const AArch64TargetLowering &TLI,
	TargetLowering::DAGCombinerInfo &DCI) {
	assert(N->getOpcode() == AArch64ISD::VASHR \|\|
	N->getOpcode() == AArch64ISD::VLSHR);

	SDValue Op = N->getOperand(0);
	unsigned OpScalarSize = Op.getScalarValueSizeInBits();

	unsigned ShiftImm = N->getConstantOperandVal(1);
	assert(OpScalarSize > ShiftImm && "Invalid shift imm");

	APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
	APInt DemandedMask = ~ShiftedOutBits;

	if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
	return SDValue(N, 0);

	return SDValue();
	}

	static SDValue performSunpkloCombine(SDNode *N, SelectionDAG &DAG) {
	// sunpklo(sext(pred)) -> sext(extract_low_half(pred))
	// This transform works in partnership with performSetCCPunpkCombine to
	// remove unnecessary transfer of predicates into standard registers and back
	if (N->getOperand(0).getOpcode() == ISD::SIGN_EXTEND &&
	N->getOperand(0)->getOperand(0)->getValueType(0).getScalarType() ==
	MVT::i1) {
	SDValue CC = N->getOperand(0)->getOperand(0);
	auto VT = CC->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
	SDValue Unpk = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, CC,
	DAG.getVectorIdxConstant(0, SDLoc(N)));
	return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Unpk);
	}

	return SDValue();
	}

	/// Target-specific DAG combine function for post-increment LD1 (lane) and
	/// post-increment LD1R.
	static SDValue performPostLD1Combine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	bool IsLaneOp) {
	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	if (!VT.is128BitVector() && !VT.is64BitVector())
	return SDValue();

	unsigned LoadIdx = IsLaneOp ? 1 : 0;
	SDNode *LD = N->getOperand(LoadIdx).getNode();
	// If it is not LOAD, can not do such combine.
	if (LD->getOpcode() != ISD::LOAD)
	return SDValue();

	// The vector lane must be a constant in the LD1LANE opcode.
	SDValue Lane;
	if (IsLaneOp) {
	Lane = N->getOperand(2);
	auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
	if (!LaneC \|\| LaneC->getZExtValue() >= VT.getVectorNumElements())
	return SDValue();
	}

	LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
	EVT MemVT = LoadSDN->getMemoryVT();
	// Check if memory operand is the same type as the vector element.
	if (MemVT != VT.getVectorElementType())
	return SDValue();

	// Check if there are other uses. If so, do not combine as it will introduce
	// an extra load.
	for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
	++UI) {
	if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
	continue;
	if (*UI != N)
	return SDValue();
	}

	SDValue Addr = LD->getOperand(1);
	SDValue Vector = N->getOperand(0);
	// Search for a use of the address operand that is an increment.
	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
	Addr.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User->getOpcode() != ISD::ADD
	\|\| UI.getUse().getResNo() != Addr.getResNo())
	continue;

	// If the increment is a constant, it must match the memory ref size.
	SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
	if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
	uint32_t IncVal = CInc->getZExtValue();
	unsigned NumBytes = VT.getScalarSizeInBits() / 8;
	if (IncVal != NumBytes)
	continue;
	Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
	}

	// To avoid cycle construction make sure that neither the load nor the add
	// are predecessors to each other or the Vector.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Visited.insert(Addr.getNode());
	Worklist.push_back(User);
	Worklist.push_back(LD);
	Worklist.push_back(Vector.getNode());
	if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) \|\|
	SDNode::hasPredecessorHelper(User, Visited, Worklist))
	continue;

	SmallVector<SDValue, 8> Ops;
	Ops.push_back(LD->getOperand(0)); // Chain
	if (IsLaneOp) {
	Ops.push_back(Vector); // The vector to be inserted
	Ops.push_back(Lane); // The lane to be inserted in the vector
	}
	Ops.push_back(Addr);
	Ops.push_back(Inc);

	EVT Tys[3] = { VT, MVT::i64, MVT::Other };
	SDVTList SDTys = DAG.getVTList(Tys);
	unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
	SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
	MemVT,
	LoadSDN->getMemOperand());

	// Update the uses.
	SDValue NewResults[] = {
	SDValue(LD, 0), // The result of load
	SDValue(UpdN.getNode(), 2) // Chain
	};
	DCI.CombineTo(LD, NewResults);
	DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
	DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register

	break;
	}
	return SDValue();
	}

	/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
	/// address translation.
	static bool performTBISimplification(SDValue Addr,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	APInt DemandedMask = APInt::getLowBitsSet(64, 56);
	KnownBits Known;
	TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
	!DCI.isBeforeLegalizeOps());
	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
	DCI.CommitTargetLoweringOpt(TLO);
	return true;
	}
	return false;
	}

	static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
	assert((N->getOpcode() == ISD::STORE \|\| N->getOpcode() == ISD::MSTORE) &&
	"Expected STORE dag node in input!");

	if (auto Store = dyn_cast<StoreSDNode>(N)) {
	if (!Store->isTruncatingStore() \|\| Store->isIndexed())
	return SDValue();
	SDValue Ext = Store->getValue();
	auto ExtOpCode = Ext.getOpcode();
	if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
	ExtOpCode != ISD::ANY_EXTEND)
	return SDValue();
	SDValue Orig = Ext->getOperand(0);
	if (Store->getMemoryVT() != Orig.getValueType())
	return SDValue();
	return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
	Store->getBasePtr(), Store->getMemOperand());
	}

	return SDValue();
	}

	static SDValue performSTORECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	StoreSDNode *ST = cast<StoreSDNode>(N);
	SDValue Chain = ST->getChain();
	SDValue Value = ST->getValue();
	SDValue Ptr = ST->getBasePtr();

	// If this is an FP_ROUND followed by a store, fold this into a truncating
	// store. We can do this even if this is already a truncstore.
	// We purposefully don't care about legality of the nodes here as we know
	// they can be split down into something legal.
	if (DCI.isBeforeLegalizeOps() && Value.getOpcode() == ISD::FP_ROUND &&
	Value.getNode()->hasOneUse() && ST->isUnindexed() &&
	Subtarget->useSVEForFixedLengthVectors() &&
	Value.getValueType().isFixedLengthVector() &&
	Value.getValueType().getFixedSizeInBits() >=
	Subtarget->getMinSVEVectorSizeInBits())
	return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
	ST->getMemoryVT(), ST->getMemOperand());

	if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
	return Split;

	if (Subtarget->supportsAddressTopByteIgnored() &&
	performTBISimplification(N->getOperand(2), DCI, DAG))
	return SDValue(N, 0);

	if (SDValue Store = foldTruncStoreOfExt(DAG, N))
	return Store;

	return SDValue();
	}

	static SDValue performMSTORECombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
	SDValue Value = MST->getValue();
	SDValue Mask = MST->getMask();
	SDLoc DL(N);

	// If this is a UZP1 followed by a masked store, fold this into a masked
	// truncating store. We can do this even if this is already a masked
	// truncstore.
	if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() &&
	MST->isUnindexed() && Mask->getOpcode() == AArch64ISD::PTRUE &&
	Value.getValueType().isInteger()) {
	Value = Value.getOperand(0);
	if (Value.getOpcode() == ISD::BITCAST) {
	EVT HalfVT =
	Value.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
	EVT InVT = Value.getOperand(0).getValueType();

	if (HalfVT.widenIntegerVectorElementType(*DAG.getContext()) == InVT) {
	unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
	unsigned PgPattern = Mask->getConstantOperandVal(0);

	// Ensure we can double the size of the predicate pattern
	unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern);
	if (NumElts && NumElts * InVT.getVectorElementType().getSizeInBits() <=
	MinSVESize) {
	Mask = getPTrue(DAG, DL, InVT.changeVectorElementType(MVT::i1),
	PgPattern);
	return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0),
	MST->getBasePtr(), MST->getOffset(), Mask,
	MST->getMemoryVT(), MST->getMemOperand(),
	MST->getAddressingMode(),
	/IsTruncating=/true);
	}
	}
	}
	}

	return SDValue();
	}

	/// \return true if part of the index was folded into the Base.
	static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale,
	SDLoc DL, SelectionDAG &DAG) {
	// This function assumes a vector of i64 indices.
	EVT IndexVT = Index.getValueType();
	if (!IndexVT.isVector() \|\| IndexVT.getVectorElementType() != MVT::i64)
	return false;

	// Simplify:
	// BasePtr = Ptr
	// Index = X + splat(Offset)
	// ->
	// BasePtr = Ptr + Offset * scale.
	// Index = X
	if (Index.getOpcode() == ISD::ADD) {
	if (auto Offset = DAG.getSplatValue(Index.getOperand(1))) {
	Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
	BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
	Index = Index.getOperand(0);
	return true;
	}
	}

	// Simplify:
	// BasePtr = Ptr
	// Index = (X + splat(Offset)) << splat(Shift)
	// ->
	// BasePtr = Ptr + (Offset << Shift) * scale)
	// Index = X << splat(shift)
	if (Index.getOpcode() == ISD::SHL &&
	Index.getOperand(0).getOpcode() == ISD::ADD) {
	SDValue Add = Index.getOperand(0);
	SDValue ShiftOp = Index.getOperand(1);
	SDValue OffsetOp = Add.getOperand(1);
	if (auto Shift = DAG.getSplatValue(ShiftOp))
	if (auto Offset = DAG.getSplatValue(OffsetOp)) {
	Offset = DAG.getNode(ISD::SHL, DL, MVT::i64, Offset, Shift);
	Offset = DAG.getNode(ISD::MUL, DL, MVT::i64, Offset, Scale);
	BasePtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, Offset);
	Index = DAG.getNode(ISD::SHL, DL, Index.getValueType(),
	Add.getOperand(0), ShiftOp);
	return true;
	}
	}

	return false;
	}

	// Analyse the specified address returning true if a more optimal addressing
	// mode is available. When returning true all parameters are updated to reflect
	// their recommended values.
	static bool findMoreOptimalIndexType(const MaskedGatherScatterSDNode *N,
	SDValue &BasePtr, SDValue &Index,
	SelectionDAG &DAG) {
	// Try to iteratively fold parts of the index into the base pointer to
	// simplify the index as much as possible.
	bool Changed = false;
	while (foldIndexIntoBase(BasePtr, Index, N->getScale(), SDLoc(N), DAG))
	Changed = true;

	// Only consider element types that are pointer sized as smaller types can
	// be easily promoted.
	EVT IndexVT = Index.getValueType();
	if (IndexVT.getVectorElementType() != MVT::i64 \|\| IndexVT == MVT::nxv2i64)
	return Changed;

	// Match:
	// Index = step(const)
	int64_t Stride = 0;
	if (Index.getOpcode() == ISD::STEP_VECTOR)
	Stride = cast<ConstantSDNode>(Index.getOperand(0))->getSExtValue();

	// Match:
	// Index = step(const) << shift(const)
	else if (Index.getOpcode() == ISD::SHL &&
	Index.getOperand(0).getOpcode() == ISD::STEP_VECTOR) {
	SDValue RHS = Index.getOperand(1);
	if (auto *Shift =
	dyn_cast_or_null<ConstantSDNode>(DAG.getSplatValue(RHS))) {
	int64_t Step = (int64_t)Index.getOperand(0).getConstantOperandVal(1);
	Stride = Step << Shift->getZExtValue();
	}
	}

	// Return early because no supported pattern is found.
	if (Stride == 0)
	return Changed;

	if (Stride < std::numeric_limits<int32_t>::min() \|\|
	Stride > std::numeric_limits<int32_t>::max())
	return Changed;

	const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
	unsigned MaxVScale =
	Subtarget.getMaxSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
	int64_t LastElementOffset =
	IndexVT.getVectorMinNumElements() * Stride * MaxVScale;

	if (LastElementOffset < std::numeric_limits<int32_t>::min() \|\|
	LastElementOffset > std::numeric_limits<int32_t>::max())
	return Changed;

	EVT NewIndexVT = IndexVT.changeVectorElementType(MVT::i32);
	// Stride does not scale explicitly by 'Scale', because it happens in
	// the gather/scatter addressing mode.
	Index = DAG.getNode(ISD::STEP_VECTOR, SDLoc(N), NewIndexVT,
	DAG.getTargetConstant(Stride, SDLoc(N), MVT::i32));
	return true;
	}

	static SDValue performMaskedGatherScatterCombine(
	SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) {
	MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
	assert(MGS && "Can only combine gather load or scatter store nodes");

	if (!DCI.isBeforeLegalize())
	return SDValue();

	SDLoc DL(MGS);
	SDValue Chain = MGS->getChain();
	SDValue Scale = MGS->getScale();
	SDValue Index = MGS->getIndex();
	SDValue Mask = MGS->getMask();
	SDValue BasePtr = MGS->getBasePtr();
	ISD::MemIndexType IndexType = MGS->getIndexType();

	if (!findMoreOptimalIndexType(MGS, BasePtr, Index, DAG))
	return SDValue();

	// Here we catch such cases early and change MGATHER's IndexType to allow
	// the use of an Index that's more legalisation friendly.
	if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
	SDValue PassThru = MGT->getPassThru();
	SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedGather(
	DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
	Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
	}
	auto *MSC = cast<MaskedScatterSDNode>(MGS);
	SDValue Data = MSC->getValue();
	SDValue Ops[] = {Chain, Data, Mask, BasePtr, Index, Scale};
	return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL,
	Ops, MSC->getMemOperand(), IndexType,
	MSC->isTruncatingStore());
	}

	/// Target-specific DAG combine function for NEON load/store intrinsics
	/// to merge base address updates.
	static SDValue performNEONPostLDSTCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	if (DCI.isBeforeLegalize() \|\| DCI.isCalledByLegalizer())
	return SDValue();

	unsigned AddrOpIdx = N->getNumOperands() - 1;
	SDValue Addr = N->getOperand(AddrOpIdx);

	// Search for a use of the address operand that is an increment.
	for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
	UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
	SDNode User = UI;
	if (User->getOpcode() != ISD::ADD \|\|
	UI.getUse().getResNo() != Addr.getResNo())
	continue;

	// Check that the add is independent of the load/store. Otherwise, folding
	// it would create a cycle.
	SmallPtrSet<const SDNode *, 32> Visited;
	SmallVector<const SDNode *, 16> Worklist;
	Visited.insert(Addr.getNode());
	Worklist.push_back(N);
	Worklist.push_back(User);
	if (SDNode::hasPredecessorHelper(N, Visited, Worklist) \|\|
	SDNode::hasPredecessorHelper(User, Visited, Worklist))
	continue;

	// Find the new opcode for the updating load/store.
	bool IsStore = false;
	bool IsLaneOp = false;
	bool IsDupOp = false;
	unsigned NewOpc = 0;
	unsigned NumVecs = 0;
	unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	switch (IntNo) {
	default: llvm_unreachable("unexpected intrinsic for Neon base update");
	case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
	NumVecs = 2; break;
	case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
	NumVecs = 3; break;
	case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
	NumVecs = 4; break;
	case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
	NumVecs = 2; IsStore = true; break;
	case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
	NumVecs = 3; IsStore = true; break;
	case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
	NumVecs = 4; IsStore = true; break;
	case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
	NumVecs = 2; break;
	case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
	NumVecs = 3; break;
	case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
	NumVecs = 4; break;
	case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
	NumVecs = 2; IsStore = true; break;
	case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
	NumVecs = 3; IsStore = true; break;
	case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
	NumVecs = 4; IsStore = true; break;
	case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
	NumVecs = 2; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
	NumVecs = 3; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
	NumVecs = 4; IsDupOp = true; break;
	case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
	NumVecs = 2; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
	NumVecs = 3; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
	NumVecs = 4; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
	NumVecs = 2; IsStore = true; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
	NumVecs = 3; IsStore = true; IsLaneOp = true; break;
	case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
	NumVecs = 4; IsStore = true; IsLaneOp = true; break;
	}

	EVT VecTy;
	if (IsStore)
	VecTy = N->getOperand(2).getValueType();
	else
	VecTy = N->getValueType(0);

	// If the increment is a constant, it must match the memory ref size.
	SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
	if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
	uint32_t IncVal = CInc->getZExtValue();
	unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
	if (IsLaneOp \|\| IsDupOp)
	NumBytes /= VecTy.getVectorNumElements();
	if (IncVal != NumBytes)
	continue;
	Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
	}
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(N->getOperand(0)); // Incoming chain
	// Load lane and store have vector list as input.
	if (IsLaneOp \|\| IsStore)
	for (unsigned i = 2; i < AddrOpIdx; ++i)
	Ops.push_back(N->getOperand(i));
	Ops.push_back(Addr); // Base register
	Ops.push_back(Inc);

	// Return Types.
	EVT Tys[6];
	unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
	unsigned n;
	for (n = 0; n < NumResultVecs; ++n)
	Tys[n] = VecTy;
	Tys[n++] = MVT::i64; // Type of write back register
	Tys[n] = MVT::Other; // Type of the chain
	SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));

	MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
	SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
	MemInt->getMemoryVT(),
	MemInt->getMemOperand());

	// Update the uses.
	std::vector<SDValue> NewResults;
	for (unsigned i = 0; i < NumResultVecs; ++i) {
	NewResults.push_back(SDValue(UpdN.getNode(), i));
	}
	NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
	DCI.CombineTo(N, NewResults);
	DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));

	break;
	}
	return SDValue();
	}

	// Checks to see if the value is the prescribed width and returns information
	// about its extension mode.
	static
	bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
	ExtType = ISD::NON_EXTLOAD;
	switch(V.getNode()->getOpcode()) {
	default:
	return false;
	case ISD::LOAD: {
	LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
	if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
	\|\| (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
	ExtType = LoadNode->getExtensionType();
	return true;
	}
	return false;
	}
	case ISD::AssertSext: {
	VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
	if ((TypeNode->getVT() == MVT::i8 && width == 8)
	\|\| (TypeNode->getVT() == MVT::i16 && width == 16)) {
	ExtType = ISD::SEXTLOAD;
	return true;
	}
	return false;
	}
	case ISD::AssertZext: {
	VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
	if ((TypeNode->getVT() == MVT::i8 && width == 8)
	\|\| (TypeNode->getVT() == MVT::i16 && width == 16)) {
	ExtType = ISD::ZEXTLOAD;
	return true;
	}
	return false;
	}
	case ISD::Constant:
	case ISD::TargetConstant: {
	return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
	1LL << (width - 1);
	}
	}

	return true;
	}

	// This function does a whole lot of voodoo to determine if the tests are
	// equivalent without and with a mask. Essentially what happens is that given a
	// DAG resembling:
	//
	// +-------------+ +-------------+ +-------------+ +-------------+
	// \| Input \| \| AddConstant \| \| CompConstant\| \| CC \|
	// +-------------+ +-------------+ +-------------+ +-------------+
	// \| \| \| \|
	// V V \| +----------+
	// +-------------+ +----+ \| \|
	// \| ADD \| \|0xff\| \| \|
	// +-------------+ +----+ \| \|
	// \| \| \| \|
	// V V \| \|
	// +-------------+ \| \|
	// \| AND \| \| \|
	// +-------------+ \| \|
	// \| \| \|
	// +-----+ \| \|
	// \| \| \|
	// V V V
	// +-------------+
	// \| CMP \|
	// +-------------+
	//
	// The AND node may be safely removed for some combinations of inputs. In
	// particular we need to take into account the extension type of the Input,
	// the exact values of AddConstant, CompConstant, and CC, along with the nominal
	// width of the input (this can work for any width inputs, the above graph is
	// specific to 8 bits.
	//
	// The specific equations were worked out by generating output tables for each
	// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
	// problem was simplified by working with 4 bit inputs, which means we only
	// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
	// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
	// patterns present in both extensions (0,7). For every distinct set of
	// AddConstant and CompConstants bit patterns we can consider the masked and
	// unmasked versions to be equivalent if the result of this function is true for
	// all 16 distinct bit patterns of for the current extension type of Input (w0).
	//
	// sub w8, w0, w1
	// and w10, w8, #0x0f
	// cmp w8, w2
	// cset w9, AArch64CC
	// cmp w10, w2
	// cset w11, AArch64CC
	// cmp w9, w11
	// cset w0, eq
	// ret
	//
	// Since the above function shows when the outputs are equivalent it defines
	// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
	// would be expensive to run during compiles. The equations below were written
	// in a test harness that confirmed they gave equivalent outputs to the above
	// for all inputs function, so they can be used determine if the removal is
	// legal instead.
	//
	// isEquivalentMaskless() is the code for testing if the AND can be removed
	// factored out of the DAG recognition as the DAG can take several forms.

	static bool isEquivalentMaskless(unsigned CC, unsigned width,
	ISD::LoadExtType ExtType, int AddConstant,
	int CompConstant) {
	// By being careful about our equations and only writing the in term
	// symbolic values and well known constants (0, 1, -1, MaxUInt) we can
	// make them generally applicable to all bit widths.
	int MaxUInt = (1 << width);

	// For the purposes of these comparisons sign extending the type is
	// equivalent to zero extending the add and displacing it by half the integer
	// width. Provided we are careful and make sure our equations are valid over
	// the whole range we can just adjust the input and avoid writing equations
	// for sign extended inputs.
	if (ExtType == ISD::SEXTLOAD)
	AddConstant -= (1 << (width-1));

	switch(CC) {
	case AArch64CC::LE:
	case AArch64CC::GT:
	if ((AddConstant == 0) \|\|
	(CompConstant == MaxUInt - 1 && AddConstant < 0) \|\|
	(AddConstant >= 0 && CompConstant < 0) \|\|
	(AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
	return true;
	break;
	case AArch64CC::LT:
	case AArch64CC::GE:
	if ((AddConstant == 0) \|\|
	(AddConstant >= 0 && CompConstant <= 0) \|\|
	(AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
	return true;
	break;
	case AArch64CC::HI:
	case AArch64CC::LS:
	if ((AddConstant >= 0 && CompConstant < 0) \|\|
	(AddConstant <= 0 && CompConstant >= -1 &&
	CompConstant < AddConstant + MaxUInt))
	return true;
	break;
	case AArch64CC::PL:
	case AArch64CC::MI:
	if ((AddConstant == 0) \|\|
	(AddConstant > 0 && CompConstant <= 0) \|\|
	(AddConstant < 0 && CompConstant <= AddConstant))
	return true;
	break;
	case AArch64CC::LO:
	case AArch64CC::HS:
	if ((AddConstant >= 0 && CompConstant <= 0) \|\|
	(AddConstant <= 0 && CompConstant >= 0 &&
	CompConstant <= AddConstant + MaxUInt))
	return true;
	break;
	case AArch64CC::EQ:
	case AArch64CC::NE:
	if ((AddConstant > 0 && CompConstant < 0) \|\|
	(AddConstant < 0 && CompConstant >= 0 &&
	CompConstant < AddConstant + MaxUInt) \|\|
	(AddConstant >= 0 && CompConstant >= 0 &&
	CompConstant >= AddConstant) \|\|
	(AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
	return true;
	break;
	case AArch64CC::VS:
	case AArch64CC::VC:
	case AArch64CC::AL:
	case AArch64CC::NV:
	return true;
	case AArch64CC::Invalid:
	break;
	}

	return false;
	}

	static
	SDValue performCONDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG, unsigned CCIndex,
	unsigned CmpIndex) {
	unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
	SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
	unsigned CondOpcode = SubsNode->getOpcode();

	if (CondOpcode != AArch64ISD::SUBS)
	return SDValue();

	// There is a SUBS feeding this condition. Is it fed by a mask we can
	// use?

	SDNode *AndNode = SubsNode->getOperand(0).getNode();
	unsigned MaskBits = 0;

	if (AndNode->getOpcode() != ISD::AND)
	return SDValue();

	if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
	uint32_t CNV = CN->getZExtValue();
	if (CNV == 255)
	MaskBits = 8;
	else if (CNV == 65535)
	MaskBits = 16;
	}

	if (!MaskBits)
	return SDValue();

	SDValue AddValue = AndNode->getOperand(0);

	if (AddValue.getOpcode() != ISD::ADD)
	return SDValue();

	// The basic dag structure is correct, grab the inputs and validate them.

	SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
	SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
	SDValue SubsInputValue = SubsNode->getOperand(1);

	// The mask is present and the provenance of all the values is a smaller type,
	// lets see if the mask is superfluous.

	if (!isa<ConstantSDNode>(AddInputValue2.getNode()) \|\|
	!isa<ConstantSDNode>(SubsInputValue.getNode()))
	return SDValue();

	ISD::LoadExtType ExtType;

	if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) \|\|
	!checkValueWidth(AddInputValue2, MaskBits, ExtType) \|\|
	!checkValueWidth(AddInputValue1, MaskBits, ExtType) )
	return SDValue();

	if(!isEquivalentMaskless(CC, MaskBits, ExtType,
	cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
	cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
	return SDValue();

	// The AND is not necessary, remove it.

	SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
	SubsNode->getValueType(1));
	SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };

	SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
	DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());

	return SDValue(N, 0);
	}

	// Optimize compare with zero and branch.
	static SDValue performBRCONDCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	MachineFunction &MF = DAG.getMachineFunction();
	// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
	// will not be produced, as they are conditional branch instructions that do
	// not set flags.
	if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
	return SDValue();

	if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
	N = NV.getNode();
	SDValue Chain = N->getOperand(0);
	SDValue Dest = N->getOperand(1);
	SDValue CCVal = N->getOperand(2);
	SDValue Cmp = N->getOperand(3);

	assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
	unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
	if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
	return SDValue();

	unsigned CmpOpc = Cmp.getOpcode();
	if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
	return SDValue();

	// Only attempt folding if there is only one use of the flag and no use of the
	// value.
	if (!Cmp->hasNUsesOfValue(0, 0) \|\| !Cmp->hasNUsesOfValue(1, 1))
	return SDValue();

	SDValue LHS = Cmp.getOperand(0);
	SDValue RHS = Cmp.getOperand(1);

	assert(LHS.getValueType() == RHS.getValueType() &&
	"Expected the value type to be the same for both operands!");
	if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
	return SDValue();

	if (isNullConstant(LHS))
	std::swap(LHS, RHS);

	if (!isNullConstant(RHS))
	return SDValue();

	if (LHS.getOpcode() == ISD::SHL \|\| LHS.getOpcode() == ISD::SRA \|\|
	LHS.getOpcode() == ISD::SRL)
	return SDValue();

	// Fold the compare into the branch instruction.
	SDValue BR;
	if (CC == AArch64CC::EQ)
	BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
	else
	BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);

	// Do not add new nodes to DAG combiner worklist.
	DCI.CombineTo(N, BR, false);

	return SDValue();
	}

	static SDValue foldCSELofCTTZ(SDNode *N, SelectionDAG &DAG) {
	unsigned CC = N->getConstantOperandVal(2);
	SDValue SUBS = N->getOperand(3);
	SDValue Zero, CTTZ;

	if (CC == AArch64CC::EQ && SUBS.getOpcode() == AArch64ISD::SUBS) {
	Zero = N->getOperand(0);
	CTTZ = N->getOperand(1);
	} else if (CC == AArch64CC::NE && SUBS.getOpcode() == AArch64ISD::SUBS) {
	Zero = N->getOperand(1);
	CTTZ = N->getOperand(0);
	} else
	return SDValue();

	if ((CTTZ.getOpcode() != ISD::CTTZ && CTTZ.getOpcode() != ISD::TRUNCATE) \|\|
	(CTTZ.getOpcode() == ISD::TRUNCATE &&
	CTTZ.getOperand(0).getOpcode() != ISD::CTTZ))
	return SDValue();

	assert((CTTZ.getValueType() == MVT::i32 \|\| CTTZ.getValueType() == MVT::i64) &&
	"Illegal type in CTTZ folding");

	if (!isNullConstant(Zero) \|\| !isNullConstant(SUBS.getOperand(1)))
	return SDValue();

	SDValue X = CTTZ.getOpcode() == ISD::TRUNCATE
	? CTTZ.getOperand(0).getOperand(0)
	: CTTZ.getOperand(0);

	if (X != SUBS.getOperand(0))
	return SDValue();

	unsigned BitWidth = CTTZ.getOpcode() == ISD::TRUNCATE
	? CTTZ.getOperand(0).getValueSizeInBits()
	: CTTZ.getValueSizeInBits();
	SDValue BitWidthMinusOne =
	DAG.getConstant(BitWidth - 1, SDLoc(N), CTTZ.getValueType());
	return DAG.getNode(ISD::AND, SDLoc(N), CTTZ.getValueType(), CTTZ,
	BitWidthMinusOne);
	}

	// Optimize CSEL instructions
	static SDValue performCSELCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	// CSEL x, x, cc -> x
	if (N->getOperand(0) == N->getOperand(1))
	return N->getOperand(0);

	// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
	// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
	if (SDValue Folded = foldCSELofCTTZ(N, DAG))
	return Folded;

	return performCONDCombine(N, DCI, DAG, 2, 3);
	}

	// Try to re-use an already extended operand of a vector SetCC feeding a
	// extended select. Doing so avoids requiring another full extension of the
	// SET_CC result when lowering the select.
	static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
	EVT Op0MVT = Op->getOperand(0).getValueType();
	if (!Op0MVT.isVector() \|\| Op->use_empty())
	return SDValue();

	// Make sure that all uses of Op are VSELECTs with result matching types where
	// the result type has a larger element type than the SetCC operand.
	SDNode FirstUse = Op->use_begin();
	if (FirstUse->getOpcode() != ISD::VSELECT)
	return SDValue();
	EVT UseMVT = FirstUse->getValueType(0);
	if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits())
	return SDValue();
	if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
	return N->getOpcode() != ISD::VSELECT \|\| N->getValueType(0) != UseMVT;
	}))
	return SDValue();

	APInt V;
	if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
	return SDValue();

	SDLoc DL(Op);
	SDValue Op0ExtV;
	SDValue Op1ExtV;
	ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
	// Check if the first operand of the SET_CC is already extended. If it is,
	// split the SET_CC and re-use the extended version of the operand.
	SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT),
	Op->getOperand(0));
	SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT),
	Op->getOperand(0));
	if (Op0SExt && (isSignedIntSetCC(CC) \|\| isIntEqualitySetCC(CC))) {
	Op0ExtV = SDValue(Op0SExt, 0);
	Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
	} else if (Op0ZExt && (isUnsignedIntSetCC(CC) \|\| isIntEqualitySetCC(CC))) {
	Op0ExtV = SDValue(Op0ZExt, 0);
	Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
	} else
	return SDValue();

	return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
	Op0ExtV, Op1ExtV, Op->getOperand(2));
	}

	static SDValue performSETCCCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	if (SDValue V = tryToWidenSetCCOperands(N, DAG))
	return V;

	// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
	if (Cond == ISD::SETNE && isOneConstant(RHS) &&
	LHS->getOpcode() == AArch64ISD::CSEL &&
	isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
	LHS->hasOneUse()) {
	// Invert CSEL's condition.
	auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
	auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
	auto NewCond = getInvertedCondCode(OldCond);

	// csel 0, 1, !cond, X
	SDValue CSEL =
	DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
	LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
	LHS.getOperand(3));
	return DAG.getZExtOrTrunc(CSEL, DL, VT);
	}

	// setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
	if (Cond == ISD::SETNE && isNullConstant(RHS) &&
	LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
	LHS->hasOneUse()) {
	EVT TstVT = LHS->getValueType(0);
	if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
	// this pattern will get better opt in emitComparison
	uint64_t TstImm = -1ULL << LHS->getConstantOperandVal(1);
	SDValue TST = DAG.getNode(ISD::AND, DL, TstVT, LHS->getOperand(0),
	DAG.getConstant(TstImm, DL, TstVT));
	return DAG.getNode(ISD::SETCC, DL, VT, TST, RHS, N->getOperand(2));
	}
	}

	// setcc (iN (bitcast (vNi1 X))), 0, (eq\|ne)
	// ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq\|ne)
	if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
	(Cond == ISD::SETEQ \|\| Cond == ISD::SETNE) && isNullConstant(RHS) &&
	LHS->getOpcode() == ISD::BITCAST) {
	EVT ToVT = LHS->getValueType(0);
	EVT FromVT = LHS->getOperand(0).getValueType();
	if (FromVT.isFixedLengthVector() &&
	FromVT.getVectorElementType() == MVT::i1) {
	LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
	LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
	return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
	}
	}

	return SDValue();
	}

	// Replace a flag-setting operator (eg ANDS) with the generic version
	// (eg AND) if the flag is unused.
	static SDValue performFlagSettingCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	unsigned GenericOpcode) {
	SDLoc DL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	EVT VT = N->getValueType(0);

	// If the flag result isn't used, convert back to a generic opcode.
	if (!N->hasAnyUseOfValue(1)) {
	SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
	return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
	DL);
	}

	// Combine identical generic nodes into this node, re-using the result.
	if (SDNode *Generic = DCI.DAG.getNodeIfExists(
	GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
	DCI.CombineTo(Generic, SDValue(N, 0));

	return SDValue();
	}

	static SDValue performSetCCPunpkCombine(SDNode *N, SelectionDAG &DAG) {
	// setcc_merge_zero pred
	// (sign_extend (extract_subvector (setcc_merge_zero ... pred ...))), 0, ne
	// => extract_subvector (inner setcc_merge_zero)
	SDValue Pred = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();

	if (Cond != ISD::SETNE \|\| !isZerosVector(RHS.getNode()) \|\|
	LHS->getOpcode() != ISD::SIGN_EXTEND)
	return SDValue();

	SDValue Extract = LHS->getOperand(0);
	if (Extract->getOpcode() != ISD::EXTRACT_SUBVECTOR \|\|
	Extract->getValueType(0) != N->getValueType(0) \|\|
	Extract->getConstantOperandVal(1) != 0)
	return SDValue();

	SDValue InnerSetCC = Extract->getOperand(0);
	if (InnerSetCC->getOpcode() != AArch64ISD::SETCC_MERGE_ZERO)
	return SDValue();

	// By this point we've effectively got
	// zero_inactive_lanes_and_trunc_i1(sext_i1(A)). If we can prove A's inactive
	// lanes are already zero then the trunc(sext()) sequence is redundant and we
	// can operate on A directly.
	SDValue InnerPred = InnerSetCC.getOperand(0);
	if (Pred.getOpcode() == AArch64ISD::PTRUE &&
	InnerPred.getOpcode() == AArch64ISD::PTRUE &&
	Pred.getConstantOperandVal(0) == InnerPred.getConstantOperandVal(0) &&
	Pred->getConstantOperandVal(0) >= AArch64SVEPredPattern::vl1 &&
	Pred->getConstantOperandVal(0) <= AArch64SVEPredPattern::vl256)
	return Extract;

	return SDValue();
	}

	static SDValue
	performSetccMergeZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
	assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
	"Unexpected opcode!");

	SelectionDAG &DAG = DCI.DAG;
	SDValue Pred = N->getOperand(0);
	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);
	ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();

	if (SDValue V = performSetCCPunpkCombine(N, DAG))
	return V;

	if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
	LHS->getOpcode() == ISD::SIGN_EXTEND &&
	LHS->getOperand(0)->getValueType(0) == N->getValueType(0)) {
	// setcc_merge_zero(
	// pred, extend(setcc_merge_zero(pred, ...)), != splat(0))
	// => setcc_merge_zero(pred, ...)
	if (LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
	LHS->getOperand(0)->getOperand(0) == Pred)
	return LHS->getOperand(0);

	// setcc_merge_zero(
	// all_active, extend(nxvNi1 ...), != splat(0))
	// -> nxvNi1 ...
	if (isAllActivePredicate(DAG, Pred))
	return LHS->getOperand(0);

	// setcc_merge_zero(
	// pred, extend(nxvNi1 ...), != splat(0))
	// -> nxvNi1 and(pred, ...)
	if (DCI.isAfterLegalizeDAG())
	// Do this after legalization to allow more folds on setcc_merge_zero
	// to be recognized.
	return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0),
	LHS->getOperand(0), Pred);
	}

	return SDValue();
	}

	// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
	// as well as whether the test should be inverted. This code is required to
	// catch these cases (as opposed to standard dag combines) because
	// AArch64ISD::TBZ is matched during legalization.
	static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
	SelectionDAG &DAG) {

	if (!Op->hasOneUse())
	return Op;

	// We don't handle undef/constant-fold cases below, as they should have
	// already been taken care of (e.g. and of 0, test of undefined shifted bits,
	// etc.)

	// (tbz (trunc x), b) -> (tbz x, b)
	// This case is just here to enable more of the below cases to be caught.
	if (Op->getOpcode() == ISD::TRUNCATE &&
	Bit < Op->getValueType(0).getSizeInBits()) {
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}

	// (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
	if (Op->getOpcode() == ISD::ANY_EXTEND &&
	Bit < Op->getOperand(0).getValueSizeInBits()) {
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}

	if (Op->getNumOperands() != 2)
	return Op;

	auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
	if (!C)
	return Op;

	switch (Op->getOpcode()) {
	default:
	return Op;

	// (tbz (and x, m), b) -> (tbz x, b)
	case ISD::AND:
	if ((C->getZExtValue() >> Bit) & 1)
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	return Op;

	// (tbz (shl x, c), b) -> (tbz x, b-c)
	case ISD::SHL:
	if (C->getZExtValue() <= Bit &&
	(Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
	Bit = Bit - C->getZExtValue();
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	return Op;

	// (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
	case ISD::SRA:
	Bit = Bit + C->getZExtValue();
	if (Bit >= Op->getValueType(0).getSizeInBits())
	Bit = Op->getValueType(0).getSizeInBits() - 1;
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);

	// (tbz (srl x, c), b) -> (tbz x, b+c)
	case ISD::SRL:
	if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
	Bit = Bit + C->getZExtValue();
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	return Op;

	// (tbz (xor x, -1), b) -> (tbnz x, b)
	case ISD::XOR:
	if ((C->getZExtValue() >> Bit) & 1)
	Invert = !Invert;
	return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
	}
	}

	// Optimize test single bit zero/non-zero and branch.
	static SDValue performTBZCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
	bool Invert = false;
	SDValue TestSrc = N->getOperand(1);
	SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);

	if (TestSrc == NewTestSrc)
	return SDValue();

	unsigned NewOpc = N->getOpcode();
	if (Invert) {
	if (NewOpc == AArch64ISD::TBZ)
	NewOpc = AArch64ISD::TBNZ;
	else {
	assert(NewOpc == AArch64ISD::TBNZ);
	NewOpc = AArch64ISD::TBZ;
	}
	}

	SDLoc DL(N);
	return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
	DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
	}

	// Swap vselect operands where it may allow a predicated operation to achieve
	// the `sel`.
	//
	// (vselect (setcc ( condcode) (_) (_)) (a) (op (a) (b)))
	// => (vselect (setcc (!condcode) (_) (_)) (op (a) (b)) (a))
	static SDValue trySwapVSelectOperands(SDNode *N, SelectionDAG &DAG) {
	auto SelectA = N->getOperand(1);
	auto SelectB = N->getOperand(2);
	auto NTy = N->getValueType(0);

	if (!NTy.isScalableVector())
	return SDValue();
	SDValue SetCC = N->getOperand(0);
	if (SetCC.getOpcode() != ISD::SETCC \|\| !SetCC.hasOneUse())
	return SDValue();

	switch (SelectB.getOpcode()) {
	default:
	return SDValue();
	case ISD::FMUL:
	case ISD::FSUB:
	case ISD::FADD:
	break;
	}
	if (SelectA != SelectB.getOperand(0))
	return SDValue();

	ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
	ISD::CondCode InverseCC =
	ISD::getSetCCInverse(CC, SetCC.getOperand(0).getValueType());
	auto InverseSetCC =
	DAG.getSetCC(SDLoc(SetCC), SetCC.getValueType(), SetCC.getOperand(0),
	SetCC.getOperand(1), InverseCC);

	return DAG.getNode(ISD::VSELECT, SDLoc(N), NTy,
	{InverseSetCC, SelectB, SelectA});
	}

	// vselect (v1i1 setcc) ->
	// vselect (v1iXX setcc) (XX is the size of the compared operand type)
	// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
	// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
	// such VSELECT.
	static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
	if (auto SwapResult = trySwapVSelectOperands(N, DAG))
	return SwapResult;

	SDValue N0 = N->getOperand(0);
	EVT CCVT = N0.getValueType();

	if (isAllActivePredicate(DAG, N0))
	return N->getOperand(1);

	if (isAllInactivePredicate(N0))
	return N->getOperand(2);

	// Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
	// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
	// supported types.
	SDValue SetCC = N->getOperand(0);
	if (SetCC.getOpcode() == ISD::SETCC &&
	SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
	SDValue CmpLHS = SetCC.getOperand(0);
	EVT VT = CmpLHS.getValueType();
	SDNode *CmpRHS = SetCC.getOperand(1).getNode();
	SDNode *SplatLHS = N->getOperand(1).getNode();
	SDNode *SplatRHS = N->getOperand(2).getNode();
	APInt SplatLHSVal;
	if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
	VT.isSimple() &&
	is_contained(
	makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
	MVT::v2i32, MVT::v4i32, MVT::v2i64}),
	VT.getSimpleVT().SimpleTy) &&
	ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
	SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
	ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
	unsigned NumElts = VT.getVectorNumElements();
	SmallVector<SDValue, 8> Ops(
	NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
	VT.getScalarType()));
	SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);

	auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
	auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
	return Or;
	}
	}

	if (N0.getOpcode() != ISD::SETCC \|\|
	CCVT.getVectorElementCount() != ElementCount::getFixed(1) \|\|
	CCVT.getVectorElementType() != MVT::i1)
	return SDValue();

	EVT ResVT = N->getValueType(0);
	EVT CmpVT = N0.getOperand(0).getValueType();
	// Only combine when the result type is of the same size as the compared
	// operands.
	if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
	return SDValue();

	SDValue IfTrue = N->getOperand(1);
	SDValue IfFalse = N->getOperand(2);
	SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
	N0.getOperand(0), N0.getOperand(1),
	cast<CondCodeSDNode>(N0.getOperand(2))->get());
	return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
	IfTrue, IfFalse);
	}

	/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
	/// the compare-mask instructions rather than going via NZCV, even if LHS and
	/// RHS are really scalar. This replaces any scalar setcc in the above pattern
	/// with a vector one followed by a DUP shuffle on the result.
	static SDValue performSelectCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);
	EVT ResVT = N->getValueType(0);

	if (N0.getOpcode() != ISD::SETCC)
	return SDValue();

	if (ResVT.isScalableVector())
	return SDValue();

	// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
	// scalar SetCCResultType. We also don't expect vectors, because we assume
	// that selects fed by vector SETCCs are canonicalized to VSELECT.
	assert((N0.getValueType() == MVT::i1 \|\| N0.getValueType() == MVT::i32) &&
	"Scalar-SETCC feeding SELECT has unexpected result type!");

	// If NumMaskElts == 0, the comparison is larger than select result. The
	// largest real NEON comparison is 64-bits per lane, which means the result is
	// at most 32-bits and an illegal vector. Just bail out for now.
	EVT SrcVT = N0.getOperand(0).getValueType();

	// Don't try to do this optimization when the setcc itself has i1 operands.
	// There are no legal vectors of i1, so this would be pointless.
	if (SrcVT == MVT::i1)
	return SDValue();

	int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
	if (!ResVT.isVector() \|\| NumMaskElts == 0)
	return SDValue();

	SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
	EVT CCVT = SrcVT.changeVectorElementTypeToInteger();

	// Also bail out if the vector CCVT isn't the same size as ResVT.
	// This can happen if the SETCC operand size doesn't divide the ResVT size
	// (e.g., f64 vs v3f32).
	if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
	return SDValue();

	// Make sure we didn't create illegal types, if we're not supposed to.
	assert(DCI.isBeforeLegalize() \|\|
	DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));

	// First perform a vector comparison, where lane 0 is the one we're interested
	// in.
	SDLoc DL(N0);
	SDValue LHS =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
	SDValue RHS =
	DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
	SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));

	// Now duplicate the comparison mask we want across all other lanes.
	SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
	SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
	Mask = DAG.getNode(ISD::BITCAST, DL,
	ResVT.changeVectorElementTypeToInteger(), Mask);

	return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
	}

	static SDValue performDUPCombine(SDNode *N,
	TargetLowering::DAGCombinerInfo &DCI) {
	EVT VT = N->getValueType(0);
	// If "v2i32 DUP(x)" and "v4i32 DUP(x)" both exist, use an extract from the
	// 128bit vector version.
	if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
	EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
	if (SDNode *LN = DCI.DAG.getNodeIfExists(
	N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) {
	SDLoc DL(N);
	return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
	DCI.DAG.getConstant(0, DL, MVT::i64));
	}
	}

	return performPostLD1Combine(N, DCI, false);
	}

	/// Get rid of unnecessary NVCASTs (that don't change the type).
	static SDValue performNVCASTCombine(SDNode *N) {
	if (N->getValueType(0) == N->getOperand(0).getValueType())
	return N->getOperand(0);

	return SDValue();
	}

	// If all users of the globaladdr are of the form (globaladdr + constant), find
	// the smallest constant, fold it into the globaladdr's offset and rewrite the
	// globaladdr as (globaladdr + constant) - constant.
	static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget,
	const TargetMachine &TM) {
	auto *GN = cast<GlobalAddressSDNode>(N);
	if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
	AArch64II::MO_NO_FLAG)
	return SDValue();

	uint64_t MinOffset = -1ull;
	for (SDNode *N : GN->uses()) {
	if (N->getOpcode() != ISD::ADD)
	return SDValue();
	auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
	if (!C)
	C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C)
	return SDValue();
	MinOffset = std::min(MinOffset, C->getZExtValue());
	}
	uint64_t Offset = MinOffset + GN->getOffset();

	// Require that the new offset is larger than the existing one. Otherwise, we
	// can end up oscillating between two possible DAGs, for example,
	// (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
	if (Offset <= uint64_t(GN->getOffset()))
	return SDValue();

	// Check whether folding this offset is legal. It must not go out of bounds of
	// the referenced object to avoid violating the code model, and must be
	// smaller than 2^20 because this is the largest offset expressible in all
	// object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
	// stores an immediate signed 21 bit offset.)
	//
	// This check also prevents us from folding negative offsets, which will end
	// up being treated in the same way as large positive ones. They could also
	// cause code model violations, and aren't really common enough to matter.
	if (Offset >= (1 << 20))
	return SDValue();

	const GlobalValue *GV = GN->getGlobal();
	Type *T = GV->getValueType();
	if (!T->isSized() \|\|
	Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
	return SDValue();

	SDLoc DL(GN);
	SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
	return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
	DAG.getConstant(MinOffset, DL, MVT::i64));
	}

	// Turns the vector of indices into a vector of byte offstes by scaling Offset
	// by (BitWidth / 8).
	static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
	SDLoc DL, unsigned BitWidth) {
	assert(Offset.getValueType().isScalableVector() &&
	"This method is only for scalable vectors of offsets");

	SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
	SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);

	return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
	}

	/// Check if the value of \p OffsetInBytes can be used as an immediate for
	/// the gather load/prefetch and scatter store instructions with vector base and
	/// immediate offset addressing mode:
	///
	/// [<Zn>.[S\|D]{, #<imm>}]
	///
	/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
	inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
	unsigned ScalarSizeInBytes) {
	// The immediate is not a multiple of the scalar size.
	if (OffsetInBytes % ScalarSizeInBytes)
	return false;

	// The immediate is out of range.
	if (OffsetInBytes / ScalarSizeInBytes > 31)
	return false;

	return true;
	}

	/// Check if the value of \p Offset represents a valid immediate for the SVE
	/// gather load/prefetch and scatter store instructiona with vector base and
	/// immediate offset addressing mode:
	///
	/// [<Zn>.[S\|D]{, #<imm>}]
	///
	/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
	static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
	unsigned ScalarSizeInBytes) {
	ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
	return OffsetConst && isValidImmForSVEVecImmAddrMode(
	OffsetConst->getZExtValue(), ScalarSizeInBytes);
	}

	static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
	unsigned Opcode,
	bool OnlyPackedOffsets = true) {
	const SDValue Src = N->getOperand(2);
	const EVT SrcVT = Src->getValueType(0);
	assert(SrcVT.isScalableVector() &&
	"Scatter stores are only possible for SVE vectors");

	SDLoc DL(N);
	MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();

	// Make sure that source data will fit into an SVE register
	if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	// For FPs, ACLE only supports _packed_ single and double precision types.
	if (SrcElVT.isFloatingPoint())
	if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
	return SDValue();

	// Depending on the addressing mode, this is either a pointer or a vector of
	// pointers (that fits into one register)
	SDValue Base = N->getOperand(4);
	// Depending on the addressing mode, this is either a single offset or a
	// vector of offsets (that fits into one register)
	SDValue Offset = N->getOperand(5);

	// For "scalar + vector of indices", just scale the indices. This only
	// applies to non-temporal scatters because there's no instruction that takes
	// indicies.
	if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
	Offset =
	getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
	Opcode = AArch64ISD::SSTNT1_PRED;
	}

	// In the case of non-temporal gather loads there's only one SVE instruction
	// per data-size: "scalar + vector", i.e.
	// * stnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
	// Since we do have intrinsics that allow the arguments to be in a different
	// order, we may need to swap them to match the spec.
	if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
	std::swap(Base, Offset);

	// SST1_IMM requires that the offset is an immediate that is:
	// * a multiple of #SizeInBytes,
	// * in the range [0, 31 x #SizeInBytes],
	// where #SizeInBytes is the size in bytes of the stored items. For
	// immediates outside that range and non-immediate scalar offsets use SST1 or
	// SST1_UXTW instead.
	if (Opcode == AArch64ISD::SST1_IMM_PRED) {
	if (!isValidImmForSVEVecImmAddrMode(Offset,
	SrcVT.getScalarSizeInBits() / 8)) {
	if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
	Opcode = AArch64ISD::SST1_UXTW_PRED;
	else
	Opcode = AArch64ISD::SST1_PRED;

	std::swap(Base, Offset);
	}
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(Base.getValueType()))
	return SDValue();

	// Some scatter store variants allow unpacked offsets, but only as nxv2i32
	// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
	// nxv2i64. Legalize accordingly.
	if (!OnlyPackedOffsets &&
	Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

	if (!TLI.isTypeLegal(Offset.getValueType()))
	return SDValue();

	// Source value type that is representable in hardware
	EVT HwSrcVt = getSVEContainerType(SrcVT);

	// Keep the original type of the input data to store - this is needed to be
	// able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
	// FP values we want the integer equivalent, so just use HwSrcVt.
	SDValue InputVT = DAG.getValueType(SrcVT);
	if (SrcVT.isFloatingPoint())
	InputVT = DAG.getValueType(HwSrcVt);

	SDVTList VTs = DAG.getVTList(MVT::Other);
	SDValue SrcNew;

	if (Src.getValueType().isFloatingPoint())
	SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
	else
	SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);

	SDValue Ops[] = {N->getOperand(0), // Chain
	SrcNew,
	N->getOperand(3), // Pg
	Base,
	Offset,
	InputVT};

	return DAG.getNode(Opcode, DL, VTs, Ops);
	}

	static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
	unsigned Opcode,
	bool OnlyPackedOffsets = true) {
	const EVT RetVT = N->getValueType(0);
	assert(RetVT.isScalableVector() &&
	"Gather loads are only possible for SVE vectors");

	SDLoc DL(N);

	// Make sure that the loaded data will fit into an SVE register
	if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
	return SDValue();

	// Depending on the addressing mode, this is either a pointer or a vector of
	// pointers (that fits into one register)
	SDValue Base = N->getOperand(3);
	// Depending on the addressing mode, this is either a single offset or a
	// vector of offsets (that fits into one register)
	SDValue Offset = N->getOperand(4);

	// For "scalar + vector of indices", just scale the indices. This only
	// applies to non-temporal gathers because there's no instruction that takes
	// indicies.
	if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
	Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
	RetVT.getScalarSizeInBits());
	Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
	}

	// In the case of non-temporal gather loads there's only one SVE instruction
	// per data-size: "scalar + vector", i.e.
	// * ldnt1{b\|h\|w\|d} { z0.s }, p0/z, [z0.s, x0]
	// Since we do have intrinsics that allow the arguments to be in a different
	// order, we may need to swap them to match the spec.
	if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
	Offset.getValueType().isVector())
	std::swap(Base, Offset);

	// GLD{FF}1_IMM requires that the offset is an immediate that is:
	// * a multiple of #SizeInBytes,
	// * in the range [0, 31 x #SizeInBytes],
	// where #SizeInBytes is the size in bytes of the loaded items. For
	// immediates outside that range and non-immediate scalar offsets use
	// GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
	if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO \|\|
	Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
	if (!isValidImmForSVEVecImmAddrMode(Offset,
	RetVT.getScalarSizeInBits() / 8)) {
	if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
	Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
	? AArch64ISD::GLD1_UXTW_MERGE_ZERO
	: AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
	else
	Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
	? AArch64ISD::GLD1_MERGE_ZERO
	: AArch64ISD::GLDFF1_MERGE_ZERO;

	std::swap(Base, Offset);
	}
	}

	auto &TLI = DAG.getTargetLoweringInfo();
	if (!TLI.isTypeLegal(Base.getValueType()))
	return SDValue();

	// Some gather load variants allow unpacked offsets, but only as nxv2i32
	// vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
	// nxv2i64. Legalize accordingly.
	if (!OnlyPackedOffsets &&
	Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);

	// Return value type that is representable in hardware
	EVT HwRetVt = getSVEContainerType(RetVT);

	// Keep the original output value type around - this is needed to be able to
	// select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
	// values we want the integer equivalent, so just use HwRetVT.
	SDValue OutVT = DAG.getValueType(RetVT);
	if (RetVT.isFloatingPoint())
	OutVT = DAG.getValueType(HwRetVt);

	SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
	SDValue Ops[] = {N->getOperand(0), // Chain
	N->getOperand(2), // Pg
	Base, Offset, OutVT};

	SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
	SDValue LoadChain = SDValue(Load.getNode(), 1);

	if (RetVT.isInteger() && (RetVT != HwRetVt))
	Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));

	// If the original return value was FP, bitcast accordingly. Doing it here
	// means that we can avoid adding TableGen patterns for FPs.
	if (RetVT.isFloatingPoint())
	Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));

	return DAG.getMergeValues({Load, LoadChain}, DL);
	}

	static SDValue
	performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
	SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Src = N->getOperand(0);
	unsigned Opc = Src->getOpcode();

	// Sign extend of an unsigned unpack -> signed unpack
	if (Opc == AArch64ISD::UUNPKHI \|\| Opc == AArch64ISD::UUNPKLO) {

	unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
	: AArch64ISD::SUNPKLO;

	// Push the sign extend to the operand of the unpack
	// This is necessary where, for example, the operand of the unpack
	// is another unpack:
	// 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
	// ->
	// 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
	// ->
	// 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
	SDValue ExtOp = Src->getOperand(0);
	auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
	EVT EltTy = VT.getVectorElementType();
	(void)EltTy;

	assert((EltTy == MVT::i8 \|\| EltTy == MVT::i16 \|\| EltTy == MVT::i32) &&
	"Sign extending from an invalid type");

	EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());

	SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
	ExtOp, DAG.getValueType(ExtVT));

	return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
	}

	if (DCI.isBeforeLegalizeOps())
	return SDValue();

	if (!EnableCombineMGatherIntrinsics)
	return SDValue();

	// SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
	// for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
	unsigned NewOpc;
	unsigned MemVTOpNum = 4;
	switch (Opc) {
	case AArch64ISD::LD1_MERGE_ZERO:
	NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::LDNF1_MERGE_ZERO:
	NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::LDFF1_MERGE_ZERO:
	NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
	MemVTOpNum = 3;
	break;
	case AArch64ISD::GLD1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
	break;
	case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
	break;
	case AArch64ISD::GLDNT1_MERGE_ZERO:
	NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
	break;
	default:
	return SDValue();
	}

	EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
	EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();

	if ((SignExtSrcVT != SrcMemVT) \|\| !Src.hasOneUse())
	return SDValue();

	EVT DstVT = N->getValueType(0);
	SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);

	SmallVector<SDValue, 5> Ops;
	for (unsigned I = 0; I < Src->getNumOperands(); ++I)
	Ops.push_back(Src->getOperand(I));

	SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
	DCI.CombineTo(N, ExtLoad);
	DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));

	// Return N so it doesn't get rechecked
	return SDValue(N, 0);
	}

	/// Legalize the gather prefetch (scalar + vector addressing mode) when the
	/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
	/// != nxv2i32) do not need legalization.
	static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
	const unsigned OffsetPos = 4;
	SDValue Offset = N->getOperand(OffsetPos);

	// Not an unpacked vector, bail out.
	if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
	return SDValue();

	// Extend the unpacked offset vector to 64-bit lanes.
	SDLoc DL(N);
	Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
	SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
	// Replace the offset operand with the 64-bit one.
	Ops[OffsetPos] = Offset;

	return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
	}

	/// Combines a node carrying the intrinsic
	/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
	/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
	/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
	/// sve gather prefetch instruction with vector plus immediate addressing mode.
	static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
	unsigned ScalarSizeInBytes) {
	const unsigned ImmPos = 4, OffsetPos = 3;
	// No need to combine the node if the immediate is valid...
	if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
	return SDValue();

	// ...otherwise swap the offset base with the offset...
	SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
	std::swap(Ops[ImmPos], Ops[OffsetPos]);
	// ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
	// `aarch64_sve_prfb_gather_uxtw_index`.
	SDLoc DL(N);
	Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
	MVT::i64);

	return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
	}

	// Return true if the vector operation can guarantee only the first lane of its
	// result contains data, with all bits in other lanes set to zero.
	static bool isLanes1toNKnownZero(SDValue Op) {
	switch (Op.getOpcode()) {
	default:
	return false;
	case AArch64ISD::ANDV_PRED:
	case AArch64ISD::EORV_PRED:
	case AArch64ISD::FADDA_PRED:
	case AArch64ISD::FADDV_PRED:
	case AArch64ISD::FMAXNMV_PRED:
	case AArch64ISD::FMAXV_PRED:
	case AArch64ISD::FMINNMV_PRED:
	case AArch64ISD::FMINV_PRED:
	case AArch64ISD::ORV_PRED:
	case AArch64ISD::SADDV_PRED:
	case AArch64ISD::SMAXV_PRED:
	case AArch64ISD::SMINV_PRED:
	case AArch64ISD::UADDV_PRED:
	case AArch64ISD::UMAXV_PRED:
	case AArch64ISD::UMINV_PRED:
	return true;
	}
	}

	static SDValue removeRedundantInsertVectorElt(SDNode *N) {
	assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!");
	SDValue InsertVec = N->getOperand(0);
	SDValue InsertElt = N->getOperand(1);
	SDValue InsertIdx = N->getOperand(2);

	// We only care about inserts into the first element...
	if (!isNullConstant(InsertIdx))
	return SDValue();
	// ...of a zero'd vector...
	if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
	return SDValue();
	// ...where the inserted data was previously extracted...
	if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	SDValue ExtractVec = InsertElt.getOperand(0);
	SDValue ExtractIdx = InsertElt.getOperand(1);

	// ...from the first element of a vector.
	if (!isNullConstant(ExtractIdx))
	return SDValue();

	// If we get here we are effectively trying to zero lanes 1-N of a vector.

	// Ensure there's no type conversion going on.
	if (N->getValueType(0) != ExtractVec.getValueType())
	return SDValue();

	if (!isLanes1toNKnownZero(ExtractVec))
	return SDValue();

	// The explicit zeroing is redundant.
	return ExtractVec;
	}

	static SDValue
	performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
	if (SDValue Res = removeRedundantInsertVectorElt(N))
	return Res;

	return performPostLD1Combine(N, DCI, true);
	}

	static SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
	EVT Ty = N->getValueType(0);
	if (Ty.isInteger())
	return SDValue();

	EVT IntTy = Ty.changeVectorElementTypeToInteger();
	EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
	if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
	IntTy.getVectorElementType().getScalarSizeInBits())
	return SDValue();

	SDLoc DL(N);
	SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
	DL, ExtIntTy);
	SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
	DL, ExtIntTy);
	SDValue Idx = N->getOperand(2);
	SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
	SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
	return DAG.getBitcast(Ty, Trunc);
	}

	static SDValue performFPExtendCombine(SDNode *N, SelectionDAG &DAG,
	TargetLowering::DAGCombinerInfo &DCI,
	const AArch64Subtarget *Subtarget) {
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
	if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND)
	return SDValue();

	// fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
	// We purposefully don't care about legality of the nodes here as we know
	// they can be split down into something legal.
	if (DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(N0.getNode()) &&
	N0.hasOneUse() && Subtarget->useSVEForFixedLengthVectors() &&
	VT.isFixedLengthVector() &&
	VT.getFixedSizeInBits() >= Subtarget->getMinSVEVectorSizeInBits()) {
	LoadSDNode *LN0 = cast<LoadSDNode>(N0);
	SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
	LN0->getChain(), LN0->getBasePtr(),
	N0.getValueType(), LN0->getMemOperand());
	DCI.CombineTo(N, ExtLoad);
	DCI.CombineTo(N0.getNode(),
	DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(),
	ExtLoad, DAG.getIntPtrConstant(1, SDLoc(N0))),
	ExtLoad.getValue(1));
	return SDValue(N, 0); // Return N so it doesn't get rechecked!
	}

	return SDValue();
	}

	static SDValue performBSPExpandForSVE(SDNode *N, SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget,
	bool fixedSVEVectorVT) {
	EVT VT = N->getValueType(0);

	// Don't expand for SVE2
	if (!VT.isScalableVector() \|\| Subtarget->hasSVE2() \|\| Subtarget->hasSME())
	return SDValue();

	// Don't expand for NEON
	if (VT.isFixedLengthVector() && !fixedSVEVectorVT)
	return SDValue();

	SDLoc DL(N);

	SDValue Mask = N->getOperand(0);
	SDValue In1 = N->getOperand(1);
	SDValue In2 = N->getOperand(2);

	SDValue InvMask = DAG.getNOT(DL, Mask, VT);
	SDValue Sel = DAG.getNode(ISD::AND, DL, VT, Mask, In1);
	SDValue SelInv = DAG.getNode(ISD::AND, DL, VT, InvMask, In2);
	return DAG.getNode(ISD::OR, DL, VT, Sel, SelInv);
	}

	static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);

	SDValue Insert = N->getOperand(0);
	if (Insert.getOpcode() != ISD::INSERT_SUBVECTOR)
	return SDValue();

	if (!Insert.getOperand(0).isUndef())
	return SDValue();

	uint64_t IdxInsert = Insert.getConstantOperandVal(2);
	uint64_t IdxDupLane = N->getConstantOperandVal(1);
	if (IdxInsert != IdxDupLane)
	return SDValue();

	SDValue Bitcast = Insert.getOperand(1);
	if (Bitcast.getOpcode() != ISD::BITCAST)
	return SDValue();

	SDValue Subvec = Bitcast.getOperand(0);
	EVT SubvecVT = Subvec.getValueType();
	if (!SubvecVT.is128BitVector())
	return SDValue();
	EVT NewSubvecVT =
	getPackedSVEVectorVT(Subvec.getValueType().getVectorElementType());

	SDLoc DL(N);
	SDValue NewInsert =
	DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewSubvecVT,
	DAG.getUNDEF(NewSubvecVT), Subvec, Insert->getOperand(2));
	SDValue NewDuplane128 = DAG.getNode(AArch64ISD::DUPLANE128, DL, NewSubvecVT,
	NewInsert, N->getOperand(1));
	return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
	}

	SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	switch (N->getOpcode()) {
	default:
	LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
	break;
	case ISD::ADD:
	case ISD::SUB:
	return performAddSubCombine(N, DCI, DAG);
	case ISD::BUILD_VECTOR:
	return performBuildVectorCombine(N, DCI, DAG);
	case AArch64ISD::ANDS:
	return performFlagSettingCombine(N, DCI, ISD::AND);
	case AArch64ISD::ADC:
	if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
	return R;
	return foldADCToCINC(N, DAG);
	case AArch64ISD::SBC:
	return foldOverflowCheck(N, DAG, /* IsAdd */ false);
	case AArch64ISD::ADCS:
	if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
	return R;
	return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
	case AArch64ISD::SBCS:
	if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
	return R;
	return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
	case ISD::XOR:
	return performXorCombine(N, DAG, DCI, Subtarget);
	case ISD::MUL:
	return performMulCombine(N, DAG, DCI, Subtarget);
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	return performIntToFpCombine(N, DAG, Subtarget);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	case ISD::FP_TO_SINT_SAT:
	case ISD::FP_TO_UINT_SAT:
	return performFpToIntCombine(N, DAG, DCI, Subtarget);
	case ISD::FDIV:
	return performFDivCombine(N, DAG, DCI, Subtarget);
	case ISD::OR:
	return performORCombine(N, DCI, Subtarget);
	case ISD::AND:
	return performANDCombine(N, DCI);
	case ISD::INTRINSIC_WO_CHAIN:
	return performIntrinsicCombine(N, DCI, Subtarget);
	case ISD::ANY_EXTEND:
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	return performExtendCombine(N, DCI, DAG);
	case ISD::SIGN_EXTEND_INREG:
	return performSignExtendInRegCombine(N, DCI, DAG);
	case ISD::CONCAT_VECTORS:
	return performConcatVectorsCombine(N, DCI, DAG);
	case ISD::EXTRACT_SUBVECTOR:
	return performExtractSubvectorCombine(N, DCI, DAG);
	case ISD::INSERT_SUBVECTOR:
	return performInsertSubvectorCombine(N, DCI, DAG);
	case ISD::SELECT:
	return performSelectCombine(N, DCI);
	case ISD::VSELECT:
	return performVSelectCombine(N, DCI.DAG);
	case ISD::SETCC:
	return performSETCCCombine(N, DCI, DAG);
	case ISD::LOAD:
	if (performTBISimplification(N->getOperand(1), DCI, DAG))
	return SDValue(N, 0);
	break;
	case ISD::STORE:
	return performSTORECombine(N, DCI, DAG, Subtarget);
	case ISD::MSTORE:
	return performMSTORECombine(N, DCI, DAG, Subtarget);
	case ISD::MGATHER:
	case ISD::MSCATTER:
	return performMaskedGatherScatterCombine(N, DCI, DAG);
	case ISD::VECTOR_SPLICE:
	return performSVESpliceCombine(N, DAG);
	case ISD::FP_EXTEND:
	return performFPExtendCombine(N, DAG, DCI, Subtarget);
	case AArch64ISD::BRCOND:
	return performBRCONDCombine(N, DCI, DAG);
	case AArch64ISD::TBNZ:
	case AArch64ISD::TBZ:
	return performTBZCombine(N, DCI, DAG);
	case AArch64ISD::CSEL:
	return performCSELCombine(N, DCI, DAG);
	case AArch64ISD::DUP:
	return performDUPCombine(N, DCI);
	case AArch64ISD::DUPLANE128:
	return performDupLane128Combine(N, DAG);
	case AArch64ISD::NVCAST:
	return performNVCASTCombine(N);
	case AArch64ISD::SPLICE:
	return performSpliceCombine(N, DAG);
	case AArch64ISD::UUNPKLO:
	case AArch64ISD::UUNPKHI:
	return performUnpackCombine(N, DAG, Subtarget);
	case AArch64ISD::UZP1:
	return performUzpCombine(N, DAG);
	case AArch64ISD::SETCC_MERGE_ZERO:
	return performSetccMergeZeroCombine(N, DCI);
	case AArch64ISD::GLD1_MERGE_ZERO:
	case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1_IMM_MERGE_ZERO:
	case AArch64ISD::GLD1S_MERGE_ZERO:
	case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
	case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
	case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
	case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
	return performGLD1Combine(N, DAG);
	case AArch64ISD::VASHR:
	case AArch64ISD::VLSHR:
	return performVectorShiftCombine(N, *this, DCI);
	case AArch64ISD::SUNPKLO:
	return performSunpkloCombine(N, DAG);
	case AArch64ISD::BSP:
	return performBSPExpandForSVE(
	N, DAG, Subtarget, useSVEForFixedLengthVectorVT(N->getValueType(0)));
	case ISD::INSERT_VECTOR_ELT:
	return performInsertVectorEltCombine(N, DCI);
	case ISD::EXTRACT_VECTOR_ELT:
	return performExtractVectorEltCombine(N, DCI, Subtarget);
	case ISD::VECREDUCE_ADD:
	return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
	case AArch64ISD::UADDV:
	return performUADDVCombine(N, DAG);
	case AArch64ISD::SMULL:
	case AArch64ISD::UMULL:
	return tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG);
	case ISD::INTRINSIC_VOID:
	case ISD::INTRINSIC_W_CHAIN:
	switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
	case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
	return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /=ScalarSizeInBytes/);
	case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
	case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
	case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
	return legalizeSVEGatherPrefetchOffsVec(N, DAG);
	case Intrinsic::aarch64_neon_ld2:
	case Intrinsic::aarch64_neon_ld3:
	case Intrinsic::aarch64_neon_ld4:
	case Intrinsic::aarch64_neon_ld1x2:
	case Intrinsic::aarch64_neon_ld1x3:
	case Intrinsic::aarch64_neon_ld1x4:
	case Intrinsic::aarch64_neon_ld2lane:
	case Intrinsic::aarch64_neon_ld3lane:
	case Intrinsic::aarch64_neon_ld4lane:
	case Intrinsic::aarch64_neon_ld2r:
	case Intrinsic::aarch64_neon_ld3r:
	case Intrinsic::aarch64_neon_ld4r:
	case Intrinsic::aarch64_neon_st2:
	case Intrinsic::aarch64_neon_st3:
	case Intrinsic::aarch64_neon_st4:
	case Intrinsic::aarch64_neon_st1x2:
	case Intrinsic::aarch64_neon_st1x3:
	case Intrinsic::aarch64_neon_st1x4:
	case Intrinsic::aarch64_neon_st2lane:
	case Intrinsic::aarch64_neon_st3lane:
	case Intrinsic::aarch64_neon_st4lane:
	return performNEONPostLDSTCombine(N, DCI, DAG);
	case Intrinsic::aarch64_sve_ldnt1:
	return performLDNT1Combine(N, DAG);
	case Intrinsic::aarch64_sve_ld1rq:
	return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
	case Intrinsic::aarch64_sve_ld1ro:
	return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
	case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1:
	return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldnf1:
	return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1:
	return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_st1:
	return performST1Combine(N, DAG);
	case Intrinsic::aarch64_sve_stnt1:
	return performSTNT1Combine(N, DAG);
	case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
	case Intrinsic::aarch64_sve_stnt1_scatter_index:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
	case Intrinsic::aarch64_sve_ld1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_SCALED_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ld1_gather_sxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_uxtw:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather:
	return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
	case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
	return performGatherLoadCombine(N, DAG,
	AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
	case Intrinsic::aarch64_sve_st1_scatter:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
	case Intrinsic::aarch64_sve_st1_scatter_index:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
	case Intrinsic::aarch64_sve_st1_scatter_sxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_uxtw:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
	return performScatterStoreCombine(N, DAG,
	AArch64ISD::SST1_SXTW_SCALED_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
	return performScatterStoreCombine(N, DAG,
	AArch64ISD::SST1_UXTW_SCALED_PRED,
	/OnlyPackedOffsets=/false);
	case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
	return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
	case Intrinsic::aarch64_sve_tuple_get: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Src1 = N->getOperand(2);
	SDValue Idx = N->getOperand(3);

	uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
	EVT ResVT = N->getValueType(0);
	uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
	SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
	SDValue Val =
	DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
	return DAG.getMergeValues({Val, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_tuple_set: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Tuple = N->getOperand(2);
	SDValue Idx = N->getOperand(3);
	SDValue Vec = N->getOperand(4);

	EVT TupleVT = Tuple.getValueType();
	uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();

	uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
	uint64_t NumLanes =
	Vec.getValueType().getVectorElementCount().getKnownMinValue();

	if ((TupleLanes % NumLanes) != 0)
	report_fatal_error("invalid tuple vector!");

	uint64_t NumVecs = TupleLanes / NumLanes;

	SmallVector<SDValue, 4> Opnds;
	for (unsigned I = 0; I < NumVecs; ++I) {
	if (I == IdxConst)
	Opnds.push_back(Vec);
	else {
	SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
	Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
	Vec.getValueType(), Tuple, ExtIdx));
	}
	}
	SDValue Concat =
	DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
	return DAG.getMergeValues({Concat, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_tuple_create2:
	case Intrinsic::aarch64_sve_tuple_create3:
	case Intrinsic::aarch64_sve_tuple_create4: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);

	SmallVector<SDValue, 4> Opnds;
	for (unsigned I = 2; I < N->getNumOperands(); ++I)
	Opnds.push_back(N->getOperand(I));

	EVT VT = Opnds[0].getValueType();
	EVT EltVT = VT.getVectorElementType();
	EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
	VT.getVectorElementCount() *
	(N->getNumOperands() - 2));
	SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
	return DAG.getMergeValues({Concat, Chain}, DL);
	}
	case Intrinsic::aarch64_sve_ld2:
	case Intrinsic::aarch64_sve_ld3:
	case Intrinsic::aarch64_sve_ld4: {
	SDLoc DL(N);
	SDValue Chain = N->getOperand(0);
	SDValue Mask = N->getOperand(2);
	SDValue BasePtr = N->getOperand(3);
	SDValue LoadOps[] = {Chain, Mask, BasePtr};
	unsigned IntrinsicID =
	cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	SDValue Result =
	LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
	return DAG.getMergeValues({Result, Chain}, DL);
	}
	case Intrinsic::aarch64_rndr:
	case Intrinsic::aarch64_rndrrs: {
	unsigned IntrinsicID =
	cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
	auto Register =
	(IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
	: AArch64SysReg::RNDRRS);
	SDLoc DL(N);
	SDValue A = DAG.getNode(
	AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
	N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
	SDValue B = DAG.getNode(
	AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(0, DL, MVT::i32),
	DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
	return DAG.getMergeValues(
	{A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
	}
	default:
	break;
	}
	break;
	case ISD::GlobalAddress:
	return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
	}
	return SDValue();
	}

	// Check if the return value is used as only a return value, as otherwise
	// we can't perform a tail-call. In particular, we need to check for
	// target ISD nodes that are returns and any other "odd" constructs
	// that the generic analysis code won't necessarily catch.
	bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
	SDValue &Chain) const {
	if (N->getNumValues() != 1)
	return false;
	if (!N->hasNUsesOfValue(1, 0))
	return false;

	SDValue TCChain = Chain;
	SDNode Copy = N->use_begin();
	if (Copy->getOpcode() == ISD::CopyToReg) {
	// If the copy has a glue operand, we conservatively assume it isn't safe to
	// perform a tail call.
	if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
	MVT::Glue)
	return false;
	TCChain = Copy->getOperand(0);
	} else if (Copy->getOpcode() != ISD::FP_EXTEND)
	return false;

	bool HasRet = false;
	for (SDNode *Node : Copy->uses()) {
	if (Node->getOpcode() != AArch64ISD::RET_FLAG)
	return false;
	HasRet = true;
	}

	if (!HasRet)
	return false;

	Chain = TCChain;
	return true;
	}

	// Return whether the an instruction can potentially be optimized to a tail
	// call. This will cause the optimizers to attempt to move, or duplicate,
	// return instructions to help enable tail call optimizations for this
	// instruction.
	bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	return CI->isTailCall();
	}

	bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	bool &IsInc,
	SelectionDAG &DAG) const {
	if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
	return false;

	Base = Op->getOperand(0);
	// All of the indexed addressing mode instructions take a signed
	// 9 bit immediate offset.
	if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
	int64_t RHSC = RHS->getSExtValue();
	if (Op->getOpcode() == ISD::SUB)
	RHSC = -(uint64_t)RHSC;
	if (!isInt<9>(RHSC))
	return false;
	IsInc = (Op->getOpcode() == ISD::ADD);
	Offset = Op->getOperand(1);
	return true;
	}
	return false;
	}

	bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
	SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const {
	EVT VT;
	SDValue Ptr;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	VT = LD->getMemoryVT();
	Ptr = LD->getBasePtr();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	VT = ST->getMemoryVT();
	Ptr = ST->getBasePtr();
	} else
	return false;

	bool IsInc;
	if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
	return false;
	AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
	return true;
	}

	bool AArch64TargetLowering::getPostIndexedAddressParts(
	SDNode N, SDNode Op, SDValue &Base, SDValue &Offset,
	ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
	EVT VT;
	SDValue Ptr;
	if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
	VT = LD->getMemoryVT();
	Ptr = LD->getBasePtr();
	} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
	VT = ST->getMemoryVT();
	Ptr = ST->getBasePtr();
	} else
	return false;

	bool IsInc;
	if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
	return false;
	// Post-indexing updates the base, so it's not a valid transform
	// if that's not the same as the load's pointer.
	if (Ptr != Base)
	return false;
	AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
	return true;
	}

	void AArch64TargetLowering::ReplaceBITCASTResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	SDLoc DL(N);
	SDValue Op = N->getOperand(0);
	EVT VT = N->getValueType(0);
	EVT SrcVT = Op.getValueType();

	if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
	assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
	"Expected fp->int bitcast!");

	// Bitcasting between unpacked vector types of different element counts is
	// not a NOP because the live elements are laid out differently.
	// 01234567
	// e.g. nxv2i32 = XX??XX??
	// nxv4f16 = X?X?X?X?
	if (VT.getVectorElementCount() != SrcVT.getVectorElementCount())
	return;

	SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
	return;
	}

	if (VT != MVT::i16 \|\| (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
	return;

	Op = SDValue(
	DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
	DAG.getUNDEF(MVT::i32), Op,
	DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
	0);
	Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
	}

	static void ReplaceAddWithADDP(SDNode *N, SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	EVT VT = N->getValueType(0);
	if (!VT.is256BitVector() \|\|
	(VT.getScalarType().isFloatingPoint() &&
	!N->getFlags().hasAllowReassociation()) \|\|
	(VT.getScalarType() == MVT::f16 && !Subtarget->hasFullFP16()))
	return;

	SDValue X = N->getOperand(0);
	auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(1));
	if (!Shuf) {
	Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
	X = N->getOperand(1);
	if (!Shuf)
	return;
	}

	if (Shuf->getOperand(0) != X \|\| !Shuf->getOperand(1)->isUndef())
	return;

	// Check the mask is 1,0,3,2,5,4,...
	ArrayRef<int> Mask = Shuf->getMask();
	for (int I = 0, E = Mask.size(); I < E; I++)
	if (Mask[I] != (I % 2 == 0 ? I + 1 : I - 1))
	return;

	SDLoc DL(N);
	auto LoHi = DAG.SplitVector(X, DL);
	assert(LoHi.first.getValueType() == LoHi.second.getValueType());
	SDValue Addp = DAG.getNode(AArch64ISD::ADDP, N, LoHi.first.getValueType(),
	LoHi.first, LoHi.second);

	// Shuffle the elements back into order.
	SmallVector<int> NMask;
	for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I < E; I++) {
	NMask.push_back(I);
	NMask.push_back(I);
	}
	Results.push_back(
	DAG.getVectorShuffle(VT, DL,
	DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Addp,
	DAG.getUNDEF(LoHi.first.getValueType())),
	DAG.getUNDEF(VT), NMask));
	}

	static void ReplaceReductionResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG, unsigned InterOp,
	unsigned AcrossOp) {
	EVT LoVT, HiVT;
	SDValue Lo, Hi;
	SDLoc dl(N);
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
	SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
	SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
	Results.push_back(SplitVal);
	}

	static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
	SDLoc DL(N);
	SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
	SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
	DAG.getNode(ISD::SRL, DL, MVT::i128, N,
	DAG.getConstant(64, DL, MVT::i64)));
	return std::make_pair(Lo, Hi);
	}

	void AArch64TargetLowering::ReplaceExtractSubVectorResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	SDValue In = N->getOperand(0);
	EVT InVT = In.getValueType();

	// Common code will handle these just fine.
	if (!InVT.isScalableVector() \|\| !InVT.isInteger())
	return;

	SDLoc DL(N);
	EVT VT = N->getValueType(0);

	// The following checks bail if this is not a halving operation.

	ElementCount ResEC = VT.getVectorElementCount();

	if (InVT.getVectorElementCount() != (ResEC * 2))
	return;

	auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!CIndex)
	return;

	unsigned Index = CIndex->getZExtValue();
	if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
	return;

	unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
	EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());

	SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
	}

	// Create an even/odd pair of X registers holding integer value V.
	static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
	SDLoc dl(V.getNode());
	SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
	SDValue VHi = DAG.getAnyExtOrTrunc(
	DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
	dl, MVT::i64);
	if (DAG.getDataLayout().isBigEndian())
	std::swap (VLo, VHi);
	SDValue RegClass =
	DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
	SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
	SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
	const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
	return SDValue(
	DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
	}

	static void ReplaceCMP_SWAP_128Results(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG,
	const AArch64Subtarget *Subtarget) {
	assert(N->getValueType(0) == MVT::i128 &&
	"AtomicCmpSwap on types less than 128 should be legal");

	MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
	if (Subtarget->hasLSE() \|\| Subtarget->outlineAtomics()) {
	// LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
	// so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
	SDValue Ops[] = {
	createGPRPairNode(DAG, N->getOperand(2)), // Compare value
	createGPRPairNode(DAG, N->getOperand(3)), // Store value
	N->getOperand(1), // Ptr
	N->getOperand(0), // Chain in
	};

	unsigned Opcode;
	switch (MemOp->getMergedOrdering()) {
	case AtomicOrdering::Monotonic:
	Opcode = AArch64::CASPX;
	break;
	case AtomicOrdering::Acquire:
	Opcode = AArch64::CASPAX;
	break;
	case AtomicOrdering::Release:
	Opcode = AArch64::CASPLX;
	break;
	case AtomicOrdering::AcquireRelease:
	case AtomicOrdering::SequentiallyConsistent:
	Opcode = AArch64::CASPALX;
	break;
	default:
	llvm_unreachable("Unexpected ordering!");
	}

	MachineSDNode *CmpSwap = DAG.getMachineNode(
	Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
	DAG.setNodeMemRefs(CmpSwap, {MemOp});

	unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
	if (DAG.getDataLayout().isBigEndian())
	std::swap(SubReg1, SubReg2);
	SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
	SDValue(CmpSwap, 0));
	SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
	SDValue(CmpSwap, 0));
	Results.push_back(
	DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
	Results.push_back(SDValue(CmpSwap, 1)); // Chain out
	return;
	}

	unsigned Opcode;
	switch (MemOp->getMergedOrdering()) {
	case AtomicOrdering::Monotonic:
	Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
	break;
	case AtomicOrdering::Acquire:
	Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
	break;
	case AtomicOrdering::Release:
	Opcode = AArch64::CMP_SWAP_128_RELEASE;
	break;
	case AtomicOrdering::AcquireRelease:
	case AtomicOrdering::SequentiallyConsistent:
	Opcode = AArch64::CMP_SWAP_128;
	break;
	default:
	llvm_unreachable("Unexpected ordering!");
	}

	auto Desired = splitInt128(N->getOperand(2), DAG);
	auto New = splitInt128(N->getOperand(3), DAG);
	SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
	New.first, New.second, N->getOperand(0)};
	SDNode *CmpSwap = DAG.getMachineNode(
	Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
	Ops);
	DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});

	Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
	SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
	Results.push_back(SDValue(CmpSwap, 3));
	}

	void AArch64TargetLowering::ReplaceNodeResults(
	SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Don't know how to custom expand this");
	case ISD::BITCAST:
	ReplaceBITCASTResults(N, Results, DAG);
	return;
	case ISD::VECREDUCE_ADD:
	case ISD::VECREDUCE_SMAX:
	case ISD::VECREDUCE_SMIN:
	case ISD::VECREDUCE_UMAX:
	case ISD::VECREDUCE_UMIN:
	Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
	return;
	case ISD::ADD:
	case ISD::FADD:
	ReplaceAddWithADDP(N, Results, DAG, Subtarget);
	return;

	case ISD::CTPOP:
	case ISD::PARITY:
	if (SDValue Result = LowerCTPOP_PARITY(SDValue(N, 0), DAG))
	Results.push_back(Result);
	return;
	case AArch64ISD::SADDV:
	ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
	return;
	case AArch64ISD::UADDV:
	ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
	return;
	case AArch64ISD::SMINV:
	ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
	return;
	case AArch64ISD::UMINV:
	ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
	return;
	case AArch64ISD::SMAXV:
	ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
	return;
	case AArch64ISD::UMAXV:
	ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
	return;
	case ISD::FP_TO_UINT:
	case ISD::FP_TO_SINT:
	case ISD::STRICT_FP_TO_SINT:
	case ISD::STRICT_FP_TO_UINT:
	assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
	// Let normal code take care of it by not adding anything to Results.
	return;
	case ISD::ATOMIC_CMP_SWAP:
	ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
	return;
	case ISD::ATOMIC_LOAD:
	case ISD::LOAD: {
	assert(SDValue(N, 0).getValueType() == MVT::i128 &&
	"unexpected load's value type");
	MemSDNode *LoadNode = cast<MemSDNode>(N);
	if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) \|\|
	LoadNode->getMemoryVT() != MVT::i128) {
	// Non-volatile or atomic loads are optimized later in AArch64's load/store
	// optimizer.
	return;
	}

	SDValue Result = DAG.getMemIntrinsicNode(
	AArch64ISD::LDP, SDLoc(N),
	DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
	{LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
	LoadNode->getMemOperand());

	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
	Result.getValue(0), Result.getValue(1));
	Results.append({Pair, Result.getValue(2) /* Chain */});
	return;
	}
	case ISD::EXTRACT_SUBVECTOR:
	ReplaceExtractSubVectorResults(N, Results, DAG);
	return;
	case ISD::INSERT_SUBVECTOR:
	case ISD::CONCAT_VECTORS:
	// Custom lowering has been requested for INSERT_SUBVECTOR and
	// CONCAT_VECTORS -- but delegate to common code for result type
	// legalisation
	return;
	case ISD::INTRINSIC_WO_CHAIN: {
	EVT VT = N->getValueType(0);
	assert((VT == MVT::i8 \|\| VT == MVT::i16) &&
	"custom lowering for unexpected type");

	ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
	Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
	switch (IntID) {
	default:
	return;
	case Intrinsic::aarch64_sve_clasta_n: {
	SDLoc DL(N);
	auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
	auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
	N->getOperand(1), Op2, N->getOperand(3));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_clastb_n: {
	SDLoc DL(N);
	auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
	auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
	N->getOperand(1), Op2, N->getOperand(3));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_lasta: {
	SDLoc DL(N);
	auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
	N->getOperand(1), N->getOperand(2));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	case Intrinsic::aarch64_sve_lastb: {
	SDLoc DL(N);
	auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
	N->getOperand(1), N->getOperand(2));
	Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
	return;
	}
	}
	}
	}
	}

	bool AArch64TargetLowering::useLoadStackGuardNode() const {
	if (Subtarget->isTargetAndroid() \|\| Subtarget->isTargetFuchsia())
	return TargetLowering::useLoadStackGuardNode();
	return true;
	}

	unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
	// Combine multiple FDIVs with the same divisor into multiple FMULs by the
	// reciprocal if there are three or more FDIVs.
	return 3;
	}

	TargetLoweringBase::LegalizeTypeAction
	AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
	// During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
	// v4i16, v2i32 instead of to promote.
	if (VT == MVT::v1i8 \|\| VT == MVT::v1i16 \|\| VT == MVT::v1i32 \|\|
	VT == MVT::v1f32)
	return TypeWidenVector;

	return TargetLoweringBase::getPreferredVectorAction(VT);
	}

	// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
	// provided the address is 16-byte aligned.
	bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
	if (!Subtarget->hasLSE2())
	return false;

	if (auto LI = dyn_cast<LoadInst>(I))
	return LI->getType()->getPrimitiveSizeInBits() == 128 &&
	LI->getAlign() >= Align(16);

	if (auto SI = dyn_cast<StoreInst>(I))
	return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
	SI->getAlign() >= Align(16);

	return false;
	}

	bool AArch64TargetLowering::shouldInsertFencesForAtomic(
	const Instruction *I) const {
	return isOpSuitableForLDPSTP(I);
	}

	// Loads and stores less than 128-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong.
	TargetLoweringBase::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
	if (Size != 128 \|\| isOpSuitableForLDPSTP(SI))
	return AtomicExpansionKind::None;
	return AtomicExpansionKind::Expand;
	}

	// Loads and stores less than 128-bits are already atomic; ones above that
	// are doomed anyway, so defer to the default libcall and blame the OS when
	// things go wrong.
	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	unsigned Size = LI->getType()->getPrimitiveSizeInBits();

	if (Size != 128 \|\| isOpSuitableForLDPSTP(LI))
	return AtomicExpansionKind::None;

	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement atomicrmw without spilling. If the target address is also on the
	// stack and close enough to the spill slot, this can lead to a situation
	// where the monitor always gets cleared and the atomic operation can never
	// succeed. So at -O0 lower this operation to a CAS loop.
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return AtomicExpansionKind::CmpXChg;

	return AtomicExpansionKind::LLSC;
	}

	// For the real atomic operations, we have ldxr/stxr up to 128 bits,
	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	if (AI->isFloatingPointOperation())
	return AtomicExpansionKind::CmpXChg;

	unsigned Size = AI->getType()->getPrimitiveSizeInBits();
	if (Size > 128) return AtomicExpansionKind::None;

	// Nand is not supported in LSE.
	// Leave 128 bits to LLSC or CmpXChg.
	if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
	if (Subtarget->hasLSE())
	return AtomicExpansionKind::None;
	if (Subtarget->outlineAtomics()) {
	// [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
	// Don't outline them unless
	// (1) high level <atomic> support approved:
	// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
	// (2) low level libgcc and compiler-rt support implemented by:
	// min/max outline atomics helpers
	if (AI->getOperation() != AtomicRMWInst::Min &&
	AI->getOperation() != AtomicRMWInst::Max &&
	AI->getOperation() != AtomicRMWInst::UMin &&
	AI->getOperation() != AtomicRMWInst::UMax) {
	return AtomicExpansionKind::None;
	}
	}
	}

	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement atomicrmw without spilling. If the target address is also on the
	// stack and close enough to the spill slot, this can lead to a situation
	// where the monitor always gets cleared and the atomic operation can never
	// succeed. So at -O0 lower this operation to a CAS loop.
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return AtomicExpansionKind::CmpXChg;

	return AtomicExpansionKind::LLSC;
	}

	TargetLowering::AtomicExpansionKind
	AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
	AtomicCmpXchgInst *AI) const {
	// If subtarget has LSE, leave cmpxchg intact for codegen.
	if (Subtarget->hasLSE() \|\| Subtarget->outlineAtomics())
	return AtomicExpansionKind::None;
	// At -O0, fast-regalloc cannot cope with the live vregs necessary to
	// implement cmpxchg without spilling. If the address being exchanged is also
	// on the stack and close enough to the spill slot, this can lead to a
	// situation where the monitor always gets cleared and the atomic operation
	// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return AtomicExpansionKind::None;

	// 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
	// it.
	unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
	if (Size > 64)
	return AtomicExpansionKind::None;

	return AtomicExpansionKind::LLSC;
	}

	Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
	Type ValueTy, Value Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	bool IsAcquire = isAcquireOrStronger(Ord);

	// Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
	// intrinsic must return {i64, i64} and we have to recombine them into a
	// single i128 here.
	if (ValueTy->getPrimitiveSizeInBits() == 128) {
	Intrinsic::ID Int =
	IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
	Function *Ldxr = Intrinsic::getDeclaration(M, Int);

	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");

	Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
	Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
	Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
	Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
	return Builder.CreateOr(
	Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
	}

	Type *Tys[] = { Addr->getType() };
	Intrinsic::ID Int =
	IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
	Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);

	const DataLayout &DL = M->getDataLayout();
	IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
	CallInst *CI = Builder.CreateCall(Ldxr, Addr);
	CI->addParamAttr(
	0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
	Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);

	return Builder.CreateBitCast(Trunc, ValueTy);
	}

	void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
	IRBuilderBase &Builder) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
	}

	Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
	Value Val, Value Addr,
	AtomicOrdering Ord) const {
	Module *M = Builder.GetInsertBlock()->getParent()->getParent();
	bool IsRelease = isReleaseOrStronger(Ord);

	// Since the intrinsics must have legal type, the i128 intrinsics take two
	// parameters: "i64, i64". We must marshal Val into the appropriate form
	// before the call.
	if (Val->getType()->getPrimitiveSizeInBits() == 128) {
	Intrinsic::ID Int =
	IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
	Function *Stxr = Intrinsic::getDeclaration(M, Int);
	Type *Int64Ty = Type::getInt64Ty(M->getContext());

	Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
	Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
	Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
	return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
	}

	Intrinsic::ID Int =
	IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
	Type *Tys[] = { Addr->getType() };
	Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);

	const DataLayout &DL = M->getDataLayout();
	IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
	Val = Builder.CreateBitCast(Val, IntValTy);

	CallInst *CI = Builder.CreateCall(
	Stxr, {Builder.CreateZExtOrBitCast(
	Val, Stxr->getFunctionType()->getParamType(0)),
	Addr});
	CI->addParamAttr(1, Attribute::get(Builder.getContext(),
	Attribute::ElementType, Val->getType()));
	return CI;
	}

	bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
	Type *Ty, CallingConv::ID CallConv, bool isVarArg,
	const DataLayout &DL) const {
	if (!Ty->isArrayTy()) {
	const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
	return TySize.isScalable() && TySize.getKnownMinSize() > 128;
	}

	// All non aggregate members of the type must have the same type
	SmallVector<EVT> ValueVTs;
	ComputeValueVTs(*this, DL, Ty, ValueVTs);
	return is_splat(ValueVTs);
	}

	bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
	EVT) const {
	return false;
	}

	static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
	Module *M = IRB.GetInsertBlock()->getParent()->getParent();
	Function *ThreadPointerFunc =
	Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
	return IRB.CreatePointerCast(
	IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
	Offset),
	IRB.getInt8PtrTy()->getPointerTo(0));
	}

	Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
	// Android provides a fixed TLS slot for the stack cookie. See the definition
	// of TLS_SLOT_STACK_GUARD in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget->isTargetAndroid())
	return UseTlsOffset(IRB, 0x28);

	// Fuchsia is similar.
	// <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
	if (Subtarget->isTargetFuchsia())
	return UseTlsOffset(IRB, -0x10);

	return TargetLowering::getIRStackGuard(IRB);
	}

	void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
	// MSVC CRT provides functionalities for stack protection.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
	// MSVC CRT has a global variable holding security cookie.
	M.getOrInsertGlobal("__security_cookie",
	Type::getInt8PtrTy(M.getContext()));

	// MSVC CRT has a function to validate security cookie.
	FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
	"__security_check_cookie", Type::getVoidTy(M.getContext()),
	Type::getInt8PtrTy(M.getContext()));
	if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
	F->setCallingConv(CallingConv::Win64);
	F->addParamAttr(0, Attribute::AttrKind::InReg);
	}
	return;
	}
	TargetLowering::insertSSPDeclarations(M);
	}

	Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
	// MSVC CRT has a global variable holding security cookie.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
	return M.getGlobalVariable("__security_cookie");
	return TargetLowering::getSDagStackGuard(M);
	}

	Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
	// MSVC CRT has a function to validate security cookie.
	if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
	return M.getFunction("__security_check_cookie");
	return TargetLowering::getSSPStackGuardCheck(M);
	}

	Value *
	AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
	// Android provides a fixed TLS slot for the SafeStack pointer. See the
	// definition of TLS_SLOT_SAFESTACK in
	// https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
	if (Subtarget->isTargetAndroid())
	return UseTlsOffset(IRB, 0x48);

	// Fuchsia is similar.
	// <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
	if (Subtarget->isTargetFuchsia())
	return UseTlsOffset(IRB, -0x8);

	return TargetLowering::getSafeStackPointerLocation(IRB);
	}

	bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
	const Instruction &AndI) const {
	// Only sink 'and' mask to cmp use block if it is masking a single bit, since
	// this is likely to be fold the and/cmp/br into a single tbz instruction. It
	// may be beneficial to sink in other cases, but we would have to check that
	// the cmp would not get folded into the br to form a cbz for these to be
	// beneficial.
	ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
	if (!Mask)
	return false;
	return Mask->getValue().isPowerOf2();
	}

	bool AArch64TargetLowering::
	shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
	SelectionDAG &DAG) const {
	// Does baseline recommend not to perform the fold by default?
	if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
	return false;
	// Else, if this is a vector shift, prefer 'shl'.
	return X.getValueType().isScalarInteger() \|\| NewShiftOpcode == ISD::SHL;
	}

	bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
	SDNode *N) const {
	if (DAG.getMachineFunction().getFunction().hasMinSize() &&
	!Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
	return false;
	return true;
	}

	void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
	// Update IsSplitCSR in AArch64unctionInfo.
	AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
	AFI->setIsSplitCSR(true);
	}

	void AArch64TargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (AArch64::GPR64RegClass.contains(*I))
	RC = &AArch64::GPR64RegClass;
	else if (AArch64::FPR64RegClass.contains(*I))
	RC = &AArch64::FPR64RegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	Register NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	// FIXME: this currently does not emit CFI pseudo-instructions, it works
	// fine for CXX_FAST_TLS since the C++-style TLS access functions should be
	// nounwind. If we want to generalize this later, we may need to emit
	// CFI pseudo-instructions.
	assert(Entry->getParent()->getFunction().hasFnAttribute(
	Attribute::NoUnwind) &&
	"Function should be nounwind in insertCopiesSplitCSR!");
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
	// Integer division on AArch64 is expensive. However, when aggressively
	// optimizing for code size, we prefer to use a div instruction, as it is
	// usually smaller than the alternative sequence.
	// The exception to this is vector division. Since AArch64 doesn't have vector
	// integer division, leaving the division as-is is a loss even in terms of
	// size, because it will have to be scalarized, while the alternative code
	// sequence can be performed in vector form.
	bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
	return OptSize && !VT.isVector();
	}

	bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
	// We want inc-of-add for scalars and sub-of-not for vectors.
	return VT.isScalarInteger();
	}

	bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
	EVT VT) const {
	// v8f16 without fp16 need to be extended to v8f32, which is more difficult to
	// legalize.
	if (FPVT == MVT::v8f16 && !Subtarget->hasFullFP16())
	return false;
	return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
	}

	bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
	return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
	}

	unsigned
	AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
	if (Subtarget->isTargetDarwin() \|\| Subtarget->isTargetWindows())
	return getPointerTy(DL).getSizeInBits();

	return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
	}

	void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
	MachineFrameInfo &MFI = MF.getFrameInfo();
	// If we have any vulnerable SVE stack objects then the stack protector
	// needs to be placed at the top of the SVE stack area, as the SVE locals
	// are placed above the other locals, so we allocate it as if it were a
	// scalable vector.
	// FIXME: It may be worthwhile having a specific interface for this rather
	// than doing it here in finalizeLowering.
	if (MFI.hasStackProtectorIndex()) {
	for (unsigned int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
	if (MFI.getStackID(i) == TargetStackID::ScalableVector &&
	MFI.getObjectSSPLayout(i) != MachineFrameInfo::SSPLK_None) {
	MFI.setStackID(MFI.getStackProtectorIndex(),
	TargetStackID::ScalableVector);
	MFI.setObjectAlignment(MFI.getStackProtectorIndex(), Align(16));
	break;
	}
	}
	}
	MFI.computeMaxCallFrameSize(MF);
	TargetLoweringBase::finalizeLowering(MF);
	}

	// Unlike X86, we let frame lowering assign offsets to all catch objects.
	bool AArch64TargetLowering::needsFixedCatchObjects() const {
	return false;
	}

	bool AArch64TargetLowering::shouldLocalize(
	const MachineInstr &MI, const TargetTransformInfo *TTI) const {
	switch (MI.getOpcode()) {
	case TargetOpcode::G_GLOBAL_VALUE: {
	// On Darwin, TLS global vars get selected into function calls, which
	// we don't want localized, as they can get moved into the middle of a
	// another call sequence.
	const GlobalValue &GV = *MI.getOperand(1).getGlobal();
	if (GV.isThreadLocal() && Subtarget->isTargetMachO())
	return false;
	break;
	}
	// If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
	// localizable.
	case AArch64::ADRP:
	case AArch64::G_ADD_LOW:
	return true;
	default:
	break;
	}
	return TargetLoweringBase::shouldLocalize(MI, TTI);
	}

	bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
	if (isa<ScalableVectorType>(Inst.getType()))
	return true;

	for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
	if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
	return true;

	if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
	if (isa<ScalableVectorType>(AI->getAllocatedType()))
	return true;
	}

	return false;
	}

	// Return the largest legal scalable vector type that matches VT's element type.
	static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
	assert(VT.isFixedLengthVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal fixed length vector!");
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for SVE container");
	case MVT::i8:
	return EVT(MVT::nxv16i8);
	case MVT::i16:
	return EVT(MVT::nxv8i16);
	case MVT::i32:
	return EVT(MVT::nxv4i32);
	case MVT::i64:
	return EVT(MVT::nxv2i64);
	case MVT::f16:
	return EVT(MVT::nxv8f16);
	case MVT::f32:
	return EVT(MVT::nxv4f32);
	case MVT::f64:
	return EVT(MVT::nxv2f64);
	}
	}

	// Return a PTRUE with active lanes corresponding to the extent of VT.
	static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
	EVT VT) {
	assert(VT.isFixedLengthVector() &&
	DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal fixed length vector!");

	Optional<unsigned> PgPattern =
	getSVEPredPatternFromNumElements(VT.getVectorNumElements());
	assert(PgPattern && "Unexpected element count for SVE predicate");

	// For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
	// AArch64SVEPredPattern::all, which can enable the use of unpredicated
	// variants of instructions when available.
	const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
	unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
	unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
	if (MaxSVESize && MinSVESize == MaxSVESize &&
	MaxSVESize == VT.getSizeInBits())
	PgPattern = AArch64SVEPredPattern::all;

	MVT MaskVT;
	switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unexpected element type for SVE predicate");
	case MVT::i8:
	MaskVT = MVT::nxv16i1;
	break;
	case MVT::i16:
	case MVT::f16:
	MaskVT = MVT::nxv8i1;
	break;
	case MVT::i32:
	case MVT::f32:
	MaskVT = MVT::nxv4i1;
	break;
	case MVT::i64:
	case MVT::f64:
	MaskVT = MVT::nxv2i1;
	break;
	}

	return getPTrue(DAG, DL, MaskVT, *PgPattern);
	}

	static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
	EVT VT) {
	assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
	"Expected legal scalable vector!");
	auto PredTy = VT.changeVectorElementType(MVT::i1);
	return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
	}

	static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
	if (VT.isFixedLengthVector())
	return getPredicateForFixedLengthVector(DAG, DL, VT);

	return getPredicateForScalableVector(DAG, DL, VT);
	}

	// Grow V to consume an entire SVE register.
	static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
	assert(VT.isScalableVector() &&
	"Expected to convert into a scalable vector!");
	assert(V.getValueType().isFixedLengthVector() &&
	"Expected a fixed length vector operand!");
	SDLoc DL(V);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
	}

	// Shrink V so it's just big enough to maintain a VT's worth of data.
	static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
	assert(VT.isFixedLengthVector() &&
	"Expected to convert into a fixed length vector!");
	assert(V.getValueType().isScalableVector() &&
	"Expected a scalable vector operand!");
	SDLoc DL(V);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
	return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
	}

	// Convert all fixed length vector loads larger than NEON to masked_loads.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Load = cast<LoadSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT LoadVT = ContainerVT;
	EVT MemVT = Load->getMemoryVT();

	auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);

	if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
	LoadVT = ContainerVT.changeTypeToInteger();
	MemVT = MemVT.changeTypeToInteger();
	}

	SDValue NewLoad = DAG.getMaskedLoad(
	LoadVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(), Pg,
	DAG.getUNDEF(LoadVT), MemVT, Load->getMemOperand(),
	Load->getAddressingMode(), Load->getExtensionType());

	SDValue Result = NewLoad;
	if (VT.isFloatingPoint() && Load->getExtensionType() == ISD::EXTLOAD) {
	EVT ExtendVT = ContainerVT.changeVectorElementType(
	Load->getMemoryVT().getVectorElementType());

	Result = getSVESafeBitCast(ExtendVT, Result, DAG);
	Result = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
	Pg, Result, DAG.getUNDEF(ContainerVT));
	}

	Result = convertFromScalableVector(DAG, VT, Result);
	SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
	return DAG.getMergeValues(MergedValues, DL);
	}

	static SDValue convertFixedMaskToScalableVector(SDValue Mask,
	SelectionDAG &DAG) {
	SDLoc DL(Mask);
	EVT InVT = Mask.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);

	auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

	if (ISD::isBuildVectorAllOnes(Mask.getNode()))
	return Pg;

	auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
	auto Op2 = DAG.getConstant(0, DL, ContainerVT);

	return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, Pg.getValueType(),
	{Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
	}

	// Convert all fixed length vector loads larger than NEON to masked_loads.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Load = cast<MaskedLoadSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);

	SDValue PassThru;
	bool IsPassThruZeroOrUndef = false;

	if (Load->getPassThru()->isUndef()) {
	PassThru = DAG.getUNDEF(ContainerVT);
	IsPassThruZeroOrUndef = true;
	} else {
	if (ContainerVT.isInteger())
	PassThru = DAG.getConstant(0, DL, ContainerVT);
	else
	PassThru = DAG.getConstantFP(0, DL, ContainerVT);
	if (isZerosVector(Load->getPassThru().getNode()))
	IsPassThruZeroOrUndef = true;
	}

	SDValue NewLoad = DAG.getMaskedLoad(
	ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
	Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
	Load->getAddressingMode(), Load->getExtensionType());

	SDValue Result = NewLoad;
	if (!IsPassThruZeroOrUndef) {
	SDValue OldPassThru =
	convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
	Result = DAG.getSelect(DL, ContainerVT, Mask, Result, OldPassThru);
	}

	Result = convertFromScalableVector(DAG, VT, Result);
	SDValue MergedValues[2] = {Result, NewLoad.getValue(1)};
	return DAG.getMergeValues(MergedValues, DL);
	}

	// Convert all fixed length vector stores larger than NEON to masked_stores.
	SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto Store = cast<StoreSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Store->getValue().getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT MemVT = Store->getMemoryVT();

	auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
	auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());

	if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
	EVT TruncVT = ContainerVT.changeVectorElementType(
	Store->getMemoryVT().getVectorElementType());
	MemVT = MemVT.changeTypeToInteger();
	NewValue = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, TruncVT, Pg,
	NewValue, DAG.getTargetConstant(0, DL, MVT::i64),
	DAG.getUNDEF(TruncVT));
	NewValue =
	getSVESafeBitCast(ContainerVT.changeTypeToInteger(), NewValue, DAG);
	}

	return DAG.getMaskedStore(Store->getChain(), DL, NewValue,
	Store->getBasePtr(), Store->getOffset(), Pg, MemVT,
	Store->getMemOperand(), Store->getAddressingMode(),
	Store->isTruncatingStore());
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	auto *Store = cast<MaskedStoreSDNode>(Op);

	SDLoc DL(Op);
	EVT VT = Store->getValue().getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
	SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);

	return DAG.getMaskedStore(
	Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
	Mask, Store->getMemoryVT(), Store->getMemOperand(),
	Store->getAddressingMode(), Store->isTruncatingStore());
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	EVT EltVT = VT.getVectorElementType();

	bool Signed = Op.getOpcode() == ISD::SDIV;
	unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;

	bool Negated;
	uint64_t SplatVal;
	if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
	SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);

	SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
	SDValue Res = DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
	if (Negated)
	Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);

	return convertFromScalableVector(DAG, VT, Res);
	}

	// Scalable vector i32/i64 DIV is supported.
	if (EltVT == MVT::i32 \|\| EltVT == MVT::i64)
	return LowerToPredicatedOp(Op, DAG, PredOpcode);

	// Scalable vector i8/i16 DIV is not supported. Promote it to i32.
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
	EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
	EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);

	// If this is not a full vector, extend, div, and truncate it.
	EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
	if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
	unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
	SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
	SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
	return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
	}

	// Convert the operands to scalable vectors.
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
	SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));

	// Extend the scalable operands.
	unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
	unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
	SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
	SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
	SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
	SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);

	// Convert back to fixed vectors so the DIV can be further lowered.
	Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
	Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
	Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
	Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
	SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
	Op0Lo, Op1Lo);
	SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
	Op0Hi, Op1Hi);

	// Convert again to scalable vectors to truncate.
	ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
	ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
	SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
	ResultLo, ResultHi);

	return convertFromScalableVector(DAG, VT, ScalableResult);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
	Val = convertToScalableVector(DAG, ContainerVT, Val);

	bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
	unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;

	// Repeatedly unpack Val until the result is of the desired element type.
	switch (ContainerVT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unimplemented container type");
	case MVT::nxv16i8:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
	if (VT.getVectorElementType() == MVT::i16)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv8i16:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
	if (VT.getVectorElementType() == MVT::i32)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv4i32:
	Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
	assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
	break;
	}

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
	Val = convertToScalableVector(DAG, ContainerVT, Val);

	// Repeatedly truncate Val until the result is of the desired element type.
	switch (ContainerVT.getSimpleVT().SimpleTy) {
	default:
	llvm_unreachable("unimplemented container type");
	case MVT::nxv2i64:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
	if (VT.getVectorElementType() == MVT::i32)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv4i32:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
	if (VT.getVectorElementType() == MVT::i16)
	break;
	LLVM_FALLTHROUGH;
	case MVT::nxv8i16:
	Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
	Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
	assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
	break;
	}

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	EVT InVT = Op.getOperand(0).getValueType();
	assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
	}

	SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	EVT InVT = Op.getOperand(0).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));

	auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
	Op.getOperand(1), Op.getOperand(2));

	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	// Convert vector operation 'Op' to an equivalent predicated operation whereby
	// the original operation's type is used to construct a suitable predicate.
	// NOTE: The results for inactive lanes are undefined.
	SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
	SelectionDAG &DAG,
	unsigned NewOp) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	auto Pg = getPredicateForVector(DAG, DL, VT);

	if (VT.isFixedLengthVector()) {
	assert(isTypeLegal(VT) && "Expected only legal fixed-width types");
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	// Create list of operands by converting existing ones to scalable types.
	SmallVector<SDValue, 4> Operands = {Pg};
	for (const SDValue &V : Op->op_values()) {
	if (isa<CondCodeSDNode>(V)) {
	Operands.push_back(V);
	continue;
	}

	if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
	EVT VTArg = VTNode->getVT().getVectorElementType();
	EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
	Operands.push_back(DAG.getValueType(NewVTArg));
	continue;
	}

	assert(isTypeLegal(V.getValueType()) &&
	"Expected only legal fixed-width types");
	Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
	}

	if (isMergePassthruOpcode(NewOp))
	Operands.push_back(DAG.getUNDEF(ContainerVT));

	auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");

	SmallVector<SDValue, 4> Operands = {Pg};
	for (const SDValue &V : Op->op_values()) {
	assert((!V.getValueType().isVector() \|\|
	V.getValueType().isScalableVector()) &&
	"Only scalable vectors are supported!");
	Operands.push_back(V);
	}

	if (isMergePassthruOpcode(NewOp))
	Operands.push_back(DAG.getUNDEF(VT));

	return DAG.getNode(NewOp, DL, VT, Operands, Op->getFlags());
	}

	// If a fixed length vector operation has no side effects when applied to
	// undefined elements, we can safely use scalable vectors to perform the same
	// operation without needing to worry about predication.
	SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(useSVEForFixedLengthVectorVT(VT) &&
	"Only expected to lower fixed length vector operation!");
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	// Create list of operands by converting existing ones to scalable types.
	SmallVector<SDValue, 4> Ops;
	for (const SDValue &V : Op->op_values()) {
	assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");

	// Pass through non-vector operands.
	if (!V.getValueType().isVector()) {
	Ops.push_back(V);
	continue;
	}

	// "cast" fixed length vector to a scalable vector.
	assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
	"Only fixed length vectors are supported!");
	Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
	}

	auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ScalarOp);
	SDValue AccOp = ScalarOp.getOperand(0);
	SDValue VecOp = ScalarOp.getOperand(1);
	EVT SrcVT = VecOp.getValueType();
	EVT ResVT = SrcVT.getVectorElementType();

	EVT ContainerVT = SrcVT;
	if (SrcVT.isFixedLengthVector()) {
	ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
	VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
	}

	SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
	SDValue Zero = DAG.getConstant(0, DL, MVT::i64);

	// Convert operands to Scalable.
	AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
	DAG.getUNDEF(ContainerVT), AccOp, Zero);

	// Perform reduction.
	SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
	Pg, AccOp, VecOp);

	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
	}

	SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ReduceOp);
	SDValue Op = ReduceOp.getOperand(0);
	EVT OpVT = Op.getValueType();
	EVT VT = ReduceOp.getValueType();

	if (!OpVT.isScalableVector() \|\| OpVT.getVectorElementType() != MVT::i1)
	return SDValue();

	SDValue Pg = getPredicateForVector(DAG, DL, OpVT);

	switch (ReduceOp.getOpcode()) {
	default:
	return SDValue();
	case ISD::VECREDUCE_OR:
	if (isAllActivePredicate(DAG, Pg) && OpVT == MVT::nxv16i1)
	// The predicate can be 'Op' because
	// vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
	return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
	else
	return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
	case ISD::VECREDUCE_AND: {
	Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
	return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
	}
	case ISD::VECREDUCE_XOR: {
	SDValue ID =
	DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
	if (OpVT == MVT::nxv1i1) {
	// Emulate a CNTP on .Q using .D and a different governing predicate.
	Pg = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Pg);
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv2i1, Op);
	}
	SDValue Cntp =
	DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
	return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
	}
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
	SDValue ScalarOp,
	SelectionDAG &DAG) const {
	SDLoc DL(ScalarOp);
	SDValue VecOp = ScalarOp.getOperand(0);
	EVT SrcVT = VecOp.getValueType();

	if (useSVEForFixedLengthVectorVT(
	SrcVT,
	/OverrideNEON=/Subtarget->useSVEForFixedLengthVectors())) {
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
	VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
	}

	// UADDV always returns an i64 result.
	EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
	SrcVT.getVectorElementType();
	EVT RdxVT = SrcVT;
	if (SrcVT.isFixedLengthVector() \|\| Opcode == AArch64ISD::UADDV_PRED)
	RdxVT = getPackedSVEVectorVT(ResVT);

	SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
	SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
	SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
	Rdx, DAG.getConstant(0, DL, MVT::i64));

	// The VEC_REDUCE nodes expect an element size result.
	if (ResVT != ScalarOp.getValueType())
	Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());

	return Res;
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc DL(Op);

	EVT InVT = Op.getOperand(1).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
	SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
	SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));

	// Convert the mask to a predicated (NOTE: We don't need to worry about
	// inactive lanes since VSELECT is safe when given undefined elements).
	EVT MaskVT = Op.getOperand(0).getValueType();
	EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
	auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
	Mask = DAG.getNode(ISD::TRUNCATE, DL,
	MaskContainerVT.changeVectorElementType(MVT::i1), Mask);

	auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
	Mask, Op1, Op2);

	return convertFromScalableVector(DAG, VT, ScalableRes);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT InVT = Op.getOperand(0).getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);

	assert(useSVEForFixedLengthVectorVT(InVT) &&
	"Only expected to lower fixed length vector operation!");
	assert(Op.getValueType() == InVT.changeTypeToInteger() &&
	"Expected integer result of the same bit length as the inputs!");

	auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
	auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
	auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);

	EVT CmpVT = Pg.getValueType();
	auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
	{Pg, Op1, Op2, Op.getOperand(2)});

	EVT PromoteVT = ContainerVT.changeTypeToInteger();
	auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
	return convertFromScalableVector(DAG, Op.getValueType(), Promote);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	auto SrcOp = Op.getOperand(0);
	EVT VT = Op.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT =
	getContainerForFixedLengthVector(DAG, SrcOp.getValueType());

	SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
	Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
	return convertFromScalableVector(DAG, VT, Op);
	}

	SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	unsigned NumOperands = Op->getNumOperands();

	assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
	"Unexpected number of operands in CONCAT_VECTORS");

	auto SrcOp1 = Op.getOperand(0);
	auto SrcOp2 = Op.getOperand(1);
	EVT VT = Op.getValueType();
	EVT SrcVT = SrcOp1.getValueType();

	if (NumOperands > 2) {
	SmallVector<SDValue, 4> Ops;
	EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
	for (unsigned I = 0; I < NumOperands; I += 2)
	Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
	Op->getOperand(I), Op->getOperand(I + 1)));

	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
	}

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

	SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
	SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
	SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);

	Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);

	return convertFromScalableVector(DAG, VT, Op);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	SDValue Pg = getPredicateForVector(DAG, DL, VT);
	EVT SrcVT = Val.getValueType();
	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ExtendVT = ContainerVT.changeVectorElementType(
	SrcVT.getVectorElementType());

	Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);

	Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
	Val = getSVESafeBitCast(ExtendVT, Val, DAG);
	Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
	Pg, Val, DAG.getUNDEF(ContainerVT));

	return convertFromScalableVector(DAG, VT, Val);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
	EVT RoundVT = ContainerSrcVT.changeVectorElementType(
	VT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
	Op.getOperand(1), DAG.getUNDEF(RoundVT));
	Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
	Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);

	Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
	return DAG.getNode(ISD::BITCAST, DL, VT, Val);
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
	unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
	: AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);

	if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
	ContainerDstVT.getVectorElementType().getSizeInBits()) {
	SDValue Pg = getPredicateForVector(DAG, DL, VT);

	Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
	VT.changeTypeToInteger(), Val);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
	// Safe to use a larger than specified operand since we just unpacked the
	// data, hence the upper bits are zero.
	Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
	DAG.getUNDEF(ContainerDstVT));
	return convertFromScalableVector(DAG, VT, Val);
	} else {
	EVT CvtVT = ContainerSrcVT.changeVectorElementType(
	ContainerDstVT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
	Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
	Val = convertFromScalableVector(DAG, SrcVT, Val);

	Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
	return DAG.getNode(ISD::BITCAST, DL, VT, Val);
	}
	}

	SDValue
	AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
	unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
	: AArch64ISD::FCVTZU_MERGE_PASSTHRU;

	SDLoc DL(Op);
	SDValue Val = Op.getOperand(0);
	EVT SrcVT = Val.getValueType();
	EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
	EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);

	if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
	ContainerDstVT.getVectorElementType().getSizeInBits()) {
	EVT CvtVT = ContainerDstVT.changeVectorElementType(
	ContainerSrcVT.getVectorElementType());
	SDValue Pg = getPredicateForVector(DAG, DL, VT);

	Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
	Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);

	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = getSVESafeBitCast(CvtVT, Val, DAG);
	Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
	DAG.getUNDEF(ContainerDstVT));
	return convertFromScalableVector(DAG, VT, Val);
	} else {
	EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
	SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);

	// Safe to use a larger than specified result since an fp_to_int where the
	// result doesn't fit into the destination is undefined.
	Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
	Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
	Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);

	return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
	}
	}

	SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
	SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

	auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
	auto ShuffleMask = SVN->getMask();

	SDLoc DL(Op);
	SDValue Op1 = Op.getOperand(0);
	SDValue Op2 = Op.getOperand(1);

	EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
	Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
	Op2 = convertToScalableVector(DAG, ContainerVT, Op2);

	bool ReverseEXT = false;
	unsigned Imm;
	if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
	Imm == VT.getVectorNumElements() - 1) {
	if (ReverseEXT)
	std::swap(Op1, Op2);

	EVT ScalarTy = VT.getVectorElementType();
	if ((ScalarTy == MVT::i8) \|\| (ScalarTy == MVT::i16))
	ScalarTy = MVT::i32;
	SDValue Scalar = DAG.getNode(
	ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
	DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
	Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
	return convertFromScalableVector(DAG, VT, Op);
	}

	for (unsigned LaneSize : {64U, 32U, 16U}) {
	if (isREVMask(ShuffleMask, VT, LaneSize)) {
	EVT NewVT =
	getPackedSVEVectorVT(EVT::getIntegerVT(*DAG.getContext(), LaneSize));
	unsigned RevOp;
	unsigned EltSz = VT.getScalarSizeInBits();
	if (EltSz == 8)
	RevOp = AArch64ISD::BSWAP_MERGE_PASSTHRU;
	else if (EltSz == 16)
	RevOp = AArch64ISD::REVH_MERGE_PASSTHRU;
	else
	RevOp = AArch64ISD::REVW_MERGE_PASSTHRU;

	Op = DAG.getNode(ISD::BITCAST, DL, NewVT, Op1);
	Op = LowerToPredicatedOp(Op, DAG, RevOp);
	Op = DAG.getNode(ISD::BITCAST, DL, ContainerVT, Op);
	return convertFromScalableVector(DAG, VT, Op);
	}
	}

	unsigned WhichResult;
	if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op2));

	if (isTRNMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
	}

	if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult == 0)
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(AArch64ISD::ZIP1, DL, ContainerVT, Op1, Op1));

	if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
	}

	// Functions like isZIPMask return true when a ISD::VECTOR_SHUFFLE's mask
	// represents the same logical operation as performed by a ZIP instruction. In
	// isolation these functions do not mean the ISD::VECTOR_SHUFFLE is exactly
	// equivalent to an AArch64 instruction. There's the extra component of
	// ISD::VECTOR_SHUFFLE's value type to consider. Prior to SVE these functions
	// only operated on 64/128bit vector types that have a direct mapping to a
	// target register and so an exact mapping is implied.
	// However, when using SVE for fixed length vectors, most legal vector types
	// are actually sub-vectors of a larger SVE register. When mapping
	// ISD::VECTOR_SHUFFLE to an SVE instruction care must be taken to consider
	// how the mask's indices translate. Specifically, when the mapping requires
	// an exact meaning for a specific vector index (e.g. Index X is the last
	// vector element in the register) then such mappings are often only safe when
	// the exact SVE register size is know. The main exception to this is when
	// indices are logically relative to the first element of either
	// ISD::VECTOR_SHUFFLE operand because these relative indices don't change
	// when converting from fixed-length to scalable vector types (i.e. the start
	// of a fixed length vector is always the start of a scalable vector).
	unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits();
	unsigned MaxSVESize = Subtarget->getMaxSVEVectorSizeInBits();
	if (MinSVESize == MaxSVESize && MaxSVESize == VT.getSizeInBits()) {
	if (ShuffleVectorInst::isReverseMask(ShuffleMask) && Op2.isUndef()) {
	Op = DAG.getNode(ISD::VECTOR_REVERSE, DL, ContainerVT, Op1);
	return convertFromScalableVector(DAG, VT, Op);
	}

	if (isZIPMask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op2));

	if (isUZPMask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op2));
	}

	if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult) && WhichResult != 0)
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(AArch64ISD::ZIP2, DL, ContainerVT, Op1, Op1));

	if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
	unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
	return convertFromScalableVector(
	DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
	}
	}

	return SDValue();
	}

	SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT InVT = Op.getValueType();

	assert(VT.isScalableVector() && isTypeLegal(VT) &&
	InVT.isScalableVector() && isTypeLegal(InVT) &&
	"Only expect to cast between legal scalable vector types!");
	assert(VT.getVectorElementType() != MVT::i1 &&
	InVT.getVectorElementType() != MVT::i1 &&
	"For predicate bitcasts, use getSVEPredicateBitCast");

	if (InVT == VT)
	return Op;

	EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
	EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());

	// Safe bitcasting between unpacked vector types of different element counts
	// is currently unsupported because the following is missing the necessary
	// work to ensure the result's elements live where they're supposed to within
	// an SVE register.
	// 01234567
	// e.g. nxv2i32 = XX??XX??
	// nxv4f16 = X?X?X?X?
	assert((VT.getVectorElementCount() == InVT.getVectorElementCount() \|\|
	VT == PackedVT \|\| InVT == PackedInVT) &&
	"Unexpected bitcast!");

	// Pack input if required.
	if (InVT != PackedInVT)
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);

	Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);

	// Unpack result if required.
	if (VT != PackedVT)
	Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);

	return Op;
	}

	bool AArch64TargetLowering::isAllActivePredicate(SelectionDAG &DAG,
	SDValue N) const {
	return ::isAllActivePredicate(DAG, N);
	}

	EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
	return ::getPromotedVTForPredicate(VT);
	}

	bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
	SDValue Op, const APInt &OriginalDemandedBits,
	const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
	unsigned Depth) const {

	unsigned Opc = Op.getOpcode();
	switch (Opc) {
	case AArch64ISD::VSHL: {
	// Match (VSHL (VLSHR Val X) X)
	SDValue ShiftL = Op;
	SDValue ShiftR = Op->getOperand(0);
	if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
	return false;

	if (!ShiftL.hasOneUse() \|\| !ShiftR.hasOneUse())
	return false;

	unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
	unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);

	// Other cases can be handled as well, but this is not
	// implemented.
	if (ShiftRBits != ShiftLBits)
	return false;

	unsigned ScalarSize = Op.getScalarValueSizeInBits();
	assert(ScalarSize > ShiftLBits && "Invalid shift imm");

	APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
	APInt UnusedBits = ~OriginalDemandedBits;

	if ((ZeroBits & UnusedBits) != ZeroBits)
	return false;

	// All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
	// used - simplify to just Val.
	return TLO.CombineTo(Op, ShiftR->getOperand(0));
	}
	}

	return TargetLowering::SimplifyDemandedBitsForTargetNode(
	Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
	}

	bool AArch64TargetLowering::isTargetCanonicalConstantNode(SDValue Op) const {
	return Op.getOpcode() == AArch64ISD::DUP \|\|
	(Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
	Op.getOperand(0).getOpcode() == AArch64ISD::DUP) \|\|
	TargetLowering::isTargetCanonicalConstantNode(Op);
	}

	bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
	unsigned Opc, LLT Ty1, LLT Ty2) const {
	return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) \|\| Ty1 == LLT::scalar(64));
	}
	diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
	index 1ba2e2f315ec..ff3bfe897869 100644
	--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
	+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
	@@ -1,1175 +1,1175 @@
	//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----- C++ --==//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the interfaces that AArch64 uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
	#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H

	#include "AArch64.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/TargetLowering.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Instruction.h"

	namespace llvm {

	namespace AArch64ISD {

	// For predicated nodes where the result is a vector, the operation is
	// controlled by a governing predicate and the inactive lanes are explicitly
	// defined with a value, please stick the following naming convention:
	//
	// _MERGE_OP<n> The result value is a vector with inactive lanes equal
	// to source operand OP<n>.
	//
	// _MERGE_ZERO The result value is a vector with inactive lanes
	// actively zeroed.
	//
	// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
	// to the last source operand which only purpose is being
	// a passthru value.
	//
	// For other cases where no explicit action is needed to set the inactive lanes,
	// or when the result is not a vector and it is needed or helpful to
	// distinguish a node from similar unpredicated nodes, use:
	//
	// _PRED
	//
	enum NodeType : unsigned {
	FIRST_NUMBER = ISD::BUILTIN_OP_END,
	WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
	CALL, // Function call.

	// Pseudo for a OBJC call that gets emitted together with a special `mov
	// x29, x29` marker instruction.
	CALL_RVMARKER,

	CALL_BTI, // Function call followed by a BTI instruction.

	// Produces the full sequence of instructions for getting the thread pointer
	// offset of a variable into X0, using the TLSDesc model.
	TLSDESC_CALLSEQ,
	ADRP, // Page address of a TargetGlobalAddress operand.
	ADR, // ADR
	ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
	LOADgot, // Load from automatically generated descriptor (e.g. Global
	// Offset Table, TLS record).
	RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
	BRCOND, // Conditional branch instruction; "b.cond".
	CSEL,
	CSINV, // Conditional select invert.
	CSNEG, // Conditional select negate.
	CSINC, // Conditional select increment.

	// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
	// ELF.
	THREAD_POINTER,
	ADC,
	SBC, // adc, sbc instructions

	// Predicated instructions where inactive lanes produce undefined results.
	ABDS_PRED,
	ABDU_PRED,
	FADD_PRED,
	FDIV_PRED,
	FMA_PRED,
	FMAX_PRED,
	FMAXNM_PRED,
	FMIN_PRED,
	FMINNM_PRED,
	FMUL_PRED,
	FSUB_PRED,
	MUL_PRED,
	MULHS_PRED,
	MULHU_PRED,
	SDIV_PRED,
	SHL_PRED,
	SMAX_PRED,
	SMIN_PRED,
	SRA_PRED,
	SRL_PRED,
	UDIV_PRED,
	UMAX_PRED,
	UMIN_PRED,

	// Unpredicated vector instructions
	BIC,

	SRAD_MERGE_OP1,

	// Predicated instructions with the result of inactive lanes provided by the
	// last operand.
	FABS_MERGE_PASSTHRU,
	FCEIL_MERGE_PASSTHRU,
	FFLOOR_MERGE_PASSTHRU,
	FNEARBYINT_MERGE_PASSTHRU,
	FNEG_MERGE_PASSTHRU,
	FRECPX_MERGE_PASSTHRU,
	FRINT_MERGE_PASSTHRU,
	FROUND_MERGE_PASSTHRU,
	FROUNDEVEN_MERGE_PASSTHRU,
	FSQRT_MERGE_PASSTHRU,
	FTRUNC_MERGE_PASSTHRU,
	FP_ROUND_MERGE_PASSTHRU,
	FP_EXTEND_MERGE_PASSTHRU,
	UINT_TO_FP_MERGE_PASSTHRU,
	SINT_TO_FP_MERGE_PASSTHRU,
	FCVTZU_MERGE_PASSTHRU,
	FCVTZS_MERGE_PASSTHRU,
	SIGN_EXTEND_INREG_MERGE_PASSTHRU,
	ZERO_EXTEND_INREG_MERGE_PASSTHRU,
	ABS_MERGE_PASSTHRU,
	NEG_MERGE_PASSTHRU,

	SETCC_MERGE_ZERO,

	// Arithmetic instructions which write flags.
	ADDS,
	SUBS,
	ADCS,
	SBCS,
	ANDS,

	// Conditional compares. Operands: left,right,falsecc,cc,flags
	CCMP,
	CCMN,
	FCCMP,

	// Floating point comparison
	FCMP,

	// Scalar extract
	EXTR,

	// Scalar-to-vector duplication
	DUP,
	DUPLANE8,
	DUPLANE16,
	DUPLANE32,
	DUPLANE64,
	DUPLANE128,

	// Vector immedate moves
	MOVI,
	MOVIshift,
	MOVIedit,
	MOVImsl,
	FMOV,
	MVNIshift,
	MVNImsl,

	// Vector immediate ops
	BICi,
	ORRi,

	// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
	// element must be identical.
	BSP,

	// Vector shuffles
	ZIP1,
	ZIP2,
	UZP1,
	UZP2,
	TRN1,
	TRN2,
	REV16,
	REV32,
	REV64,
	EXT,
	SPLICE,

	// Vector shift by scalar
	VSHL,
	VLSHR,
	VASHR,

	// Vector shift by scalar (again)
	SQSHL_I,
	UQSHL_I,
	SQSHLU_I,
	SRSHR_I,
	URSHR_I,

	// Vector shift by constant and insert
	VSLI,
	VSRI,

	// Vector comparisons
	CMEQ,
	CMGE,
	CMGT,
	CMHI,
	CMHS,
	FCMEQ,
	FCMGE,
	FCMGT,

	// Vector zero comparisons
	CMEQz,
	CMGEz,
	CMGTz,
	CMLEz,
	CMLTz,
	FCMEQz,
	FCMGEz,
	FCMGTz,
	FCMLEz,
	FCMLTz,

	// Vector across-lanes addition
	// Only the lower result lane is defined.
	SADDV,
	UADDV,

	// Add Pairwise of two vectors
	ADDP,
	// Add Long Pairwise
	SADDLP,
	UADDLP,

	// udot/sdot instructions
	UDOT,
	SDOT,

	// Vector across-lanes min/max
	// Only the lower result lane is defined.
	SMINV,
	UMINV,
	SMAXV,
	UMAXV,

	SADDV_PRED,
	UADDV_PRED,
	SMAXV_PRED,
	UMAXV_PRED,
	SMINV_PRED,
	UMINV_PRED,
	ORV_PRED,
	EORV_PRED,
	ANDV_PRED,

	// Vector bitwise insertion
	BIT,

	// Compare-and-branch
	CBZ,
	CBNZ,
	TBZ,
	TBNZ,

	// Tail calls
	TC_RETURN,

	// Custom prefetch handling
	PREFETCH,

	// {s\|u}int to FP within a FP register.
	SITOF,
	UITOF,

	/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
	/// world w.r.t vectors; which causes additional REV instructions to be
	/// generated to compensate for the byte-swapping. But sometimes we do
	/// need to re-interpret the data in SIMD vector registers in big-endian
	/// mode without emitting such REV instructions.
	NVCAST,

	MRS, // MRS, also sets the flags via a glue.

	SMULL,
	UMULL,

	// Reciprocal estimates and steps.
	FRECPE,
	FRECPS,
	FRSQRTE,
	FRSQRTS,

	SUNPKHI,
	SUNPKLO,
	UUNPKHI,
	UUNPKLO,

	CLASTA_N,
	CLASTB_N,
	LASTA,
	LASTB,
	TBL,

	// Floating-point reductions.
	FADDA_PRED,
	FADDV_PRED,
	FMAXV_PRED,
	FMAXNMV_PRED,
	FMINV_PRED,
	FMINNMV_PRED,

	INSR,
	PTEST,
	PTRUE,

	BITREVERSE_MERGE_PASSTHRU,
	BSWAP_MERGE_PASSTHRU,
	REVH_MERGE_PASSTHRU,
	REVW_MERGE_PASSTHRU,
	CTLZ_MERGE_PASSTHRU,
	CTPOP_MERGE_PASSTHRU,
	DUP_MERGE_PASSTHRU,
	INDEX_VECTOR,

	// Cast between vectors of the same element type but differ in length.
	REINTERPRET_CAST,

	// Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
	LS64_BUILD,
	LS64_EXTRACT,

	LD1_MERGE_ZERO,
	LD1S_MERGE_ZERO,
	LDNF1_MERGE_ZERO,
	LDNF1S_MERGE_ZERO,
	LDFF1_MERGE_ZERO,
	LDFF1S_MERGE_ZERO,
	LD1RQ_MERGE_ZERO,
	LD1RO_MERGE_ZERO,

	// Structured loads.
	SVE_LD2_MERGE_ZERO,
	SVE_LD3_MERGE_ZERO,
	SVE_LD4_MERGE_ZERO,

	// Unsigned gather loads.
	GLD1_MERGE_ZERO,
	GLD1_SCALED_MERGE_ZERO,
	GLD1_UXTW_MERGE_ZERO,
	GLD1_SXTW_MERGE_ZERO,
	GLD1_UXTW_SCALED_MERGE_ZERO,
	GLD1_SXTW_SCALED_MERGE_ZERO,
	GLD1_IMM_MERGE_ZERO,

	// Signed gather loads
	GLD1S_MERGE_ZERO,
	GLD1S_SCALED_MERGE_ZERO,
	GLD1S_UXTW_MERGE_ZERO,
	GLD1S_SXTW_MERGE_ZERO,
	GLD1S_UXTW_SCALED_MERGE_ZERO,
	GLD1S_SXTW_SCALED_MERGE_ZERO,
	GLD1S_IMM_MERGE_ZERO,

	// Unsigned gather loads.
	GLDFF1_MERGE_ZERO,
	GLDFF1_SCALED_MERGE_ZERO,
	GLDFF1_UXTW_MERGE_ZERO,
	GLDFF1_SXTW_MERGE_ZERO,
	GLDFF1_UXTW_SCALED_MERGE_ZERO,
	GLDFF1_SXTW_SCALED_MERGE_ZERO,
	GLDFF1_IMM_MERGE_ZERO,

	// Signed gather loads.
	GLDFF1S_MERGE_ZERO,
	GLDFF1S_SCALED_MERGE_ZERO,
	GLDFF1S_UXTW_MERGE_ZERO,
	GLDFF1S_SXTW_MERGE_ZERO,
	GLDFF1S_UXTW_SCALED_MERGE_ZERO,
	GLDFF1S_SXTW_SCALED_MERGE_ZERO,
	GLDFF1S_IMM_MERGE_ZERO,

	// Non-temporal gather loads
	GLDNT1_MERGE_ZERO,
	GLDNT1_INDEX_MERGE_ZERO,
	GLDNT1S_MERGE_ZERO,

	// Contiguous masked store.
	ST1_PRED,

	// Scatter store
	SST1_PRED,
	SST1_SCALED_PRED,
	SST1_UXTW_PRED,
	SST1_SXTW_PRED,
	SST1_UXTW_SCALED_PRED,
	SST1_SXTW_SCALED_PRED,
	SST1_IMM_PRED,

	// Non-temporal scatter store
	SSTNT1_PRED,
	SSTNT1_INDEX_PRED,

	// SME
	RDSVL,
	REVD_MERGE_PASSTHRU,

	// Asserts that a function argument (i32) is zero-extended to i8 by
	// the caller
	ASSERT_ZEXT_BOOL,

	// Strict (exception-raising) floating point comparison
	STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
	STRICT_FCMPE,

	// NEON Load/Store with post-increment base updates
	LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
	LD3post,
	LD4post,
	ST2post,
	ST3post,
	ST4post,
	LD1x2post,
	LD1x3post,
	LD1x4post,
	ST1x2post,
	ST1x3post,
	ST1x4post,
	LD1DUPpost,
	LD2DUPpost,
	LD3DUPpost,
	LD4DUPpost,
	LD1LANEpost,
	LD2LANEpost,
	LD3LANEpost,
	LD4LANEpost,
	ST2LANEpost,
	ST3LANEpost,
	ST4LANEpost,

	STG,
	STZG,
	ST2G,
	STZ2G,

	LDP,
	STP,
	STNP,

	// Memory Operations
	MOPS_MEMSET,
	MOPS_MEMSET_TAGGING,
	MOPS_MEMCOPY,
	MOPS_MEMMOVE,
	};

	} // end namespace AArch64ISD

	namespace AArch64 {
	/// Possible values of current rounding mode, which is specified in bits
	/// 23:22 of FPCR.
	enum Rounding {
	RN = 0, // Round to Nearest
	RP = 1, // Round towards Plus infinity
	RM = 2, // Round towards Minus infinity
	RZ = 3, // Round towards Zero
	rmMask = 3 // Bit mask selecting rounding mode
	};

	// Bit position of rounding mode bits in FPCR.
	const unsigned RoundingBitsPos = 22;
	} // namespace AArch64

	class AArch64Subtarget;

	class AArch64TargetLowering : public TargetLowering {
	public:
	explicit AArch64TargetLowering(const TargetMachine &TM,
	const AArch64Subtarget &STI);

	/// Control the following reassociation of operands: (op (op x, c1), y) -> (op
	/// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
	bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
	SDValue N1) const override;

	/// Selects the correct CCAssignFn for a given CallingConvention value.
	CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;

	/// Selects the correct CCAssignFn for a given CallingConvention value.
	CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;

	/// Determine which of the bits specified in Mask are known to be either zero
	/// or one and return them in the KnownZero/KnownOne bitsets.
	void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth = 0) const override;

	MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
	// Returning i64 unconditionally here (i.e. even for ILP32) means that the
	// DAG representation of pointers will always be 64-bits. They will be
	// truncated and extended when transferred to memory, but the 64-bit DAG
	// allows us to use AArch64's addressing modes much more easily.
	return MVT::getIntegerVT(64);
	}

	bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
	const APInt &DemandedElts,
	TargetLoweringOpt &TLO) const override;

	MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;

	/// Returns true if the target allows unaligned memory accesses of the
	/// specified type.
	bool allowsMisalignedMemoryAccesses(
	EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
	MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
	bool *Fast = nullptr) const override;
	/// LLT variant.
	bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
	Align Alignment,
	MachineMemOperand::Flags Flags,
	bool *Fast = nullptr) const override;

	/// Provide custom lowering hooks for some operations.
	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

	const char *getTargetNodeName(unsigned Opcode) const override;

	SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

	/// This method returns a target specific FastISel object, or null if the
	/// target does not support "fast" ISel.
	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo) const override;

	bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;

	bool isFPImmLegal(const APFloat &Imm, EVT VT,
	bool ForCodeSize) const override;

	/// Return true if the given shuffle mask can be codegen'd directly, or if it
	/// should be stack expanded.
	bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;

	/// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
	/// shuffle mask can be codegen'd directly.
	bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;

	/// Return the ISD::SETCC ValueType.
	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
	EVT VT) const override;

	SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;

	MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
	MachineBasicBlock *BB) const;

	MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
	MachineBasicBlock *BB) const;

	MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const;
	MachineBasicBlock EmitFill(MachineInstr &MI, MachineBasicBlock BB) const;
	MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI,
	MachineBasicBlock *BB) const;
	MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const;
	MachineBasicBlock EmitZero(MachineInstr &MI, MachineBasicBlock BB) const;
	MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg,
	MachineInstr &MI,
	MachineBasicBlock *BB) const;

	MachineBasicBlock *
	EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *MBB) const override;

	bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
	MachineFunction &MF,
	unsigned Intrinsic) const override;

	bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
	EVT NewVT) const override;

	bool isTruncateFree(Type Ty1, Type Ty2) const override;
	bool isTruncateFree(EVT VT1, EVT VT2) const override;

	bool isProfitableToHoist(Instruction *I) const override;

	bool isZExtFree(Type Ty1, Type Ty2) const override;
	bool isZExtFree(EVT VT1, EVT VT2) const override;
	bool isZExtFree(SDValue Val, EVT VT2) const override;

	bool shouldSinkOperands(Instruction *I,
	SmallVectorImpl<Use *> &Ops) const override;

	bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;

	unsigned getMaxSupportedInterleaveFactor() const override { return 4; }

	bool lowerInterleavedLoad(LoadInst *LI,
	ArrayRef<ShuffleVectorInst *> Shuffles,
	ArrayRef<unsigned> Indices,
	unsigned Factor) const override;
	bool lowerInterleavedStore(StoreInst SI, ShuffleVectorInst SVI,
	unsigned Factor) const override;

	bool isLegalAddImmediate(int64_t) const override;
	bool isLegalICmpImmediate(int64_t) const override;

	bool isMulAddWithConstProfitable(SDValue AddNode,
	SDValue ConstNode) const override;

	bool shouldConsiderGEPOffsetSplit() const override;

	EVT getOptimalMemOpType(const MemOp &Op,
	const AttributeList &FuncAttributes) const override;

	LLT getOptimalMemOpLLT(const MemOp &Op,
	const AttributeList &FuncAttributes) const override;

	/// Return true if the addressing mode represented by AM is legal for this
	/// target, for a load/store of the specified type.
	bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
	unsigned AS,
	Instruction *I = nullptr) const override;

	/// Return the cost of the scaling factor used in the addressing
	/// mode represented by AM for this target, for a load/store
	/// of the specified type.
	/// If the AM is supported, the return value must be >= 0.
	/// If the AM is not supported, it returns a negative value.
	InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
	Type *Ty, unsigned AS) const override;

	/// Return true if an FMA operation is faster than a pair of fmul and fadd
	/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
	/// returns true, otherwise fmuladd is expanded to fmul + fadd.
	bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
	EVT VT) const override;
	bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;

	bool generateFMAsInMachineCombiner(EVT VT,
	CodeGenOpt::Level OptLevel) const override;

	const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;

	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
	bool isDesirableToCommuteWithShift(const SDNode *N,
	CombineLevel Level) const override;

	/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
	bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;

	/// Return true if it is profitable to fold a pair of shifts into a mask.
	bool shouldFoldConstantShiftPairToMask(const SDNode *N,
	CombineLevel Level) const override;

	/// Returns true if it is beneficial to convert a load of a constant
	/// to just the constant itself.
	bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const override;

	/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
	/// with this index.
	bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const override;

	bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
	bool MathUsed) const override {
	// Using overflow ops for overflow checks only should beneficial on
	// AArch64.
	return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
	}

	Value emitLoadLinked(IRBuilderBase &Builder, Type ValueTy, Value *Addr,
	AtomicOrdering Ord) const override;
	Value emitStoreConditional(IRBuilderBase &Builder, Value Val, Value *Addr,
	AtomicOrdering Ord) const override;

	void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;

	bool isOpSuitableForLDPSTP(const Instruction *I) const;
	bool shouldInsertFencesForAtomic(const Instruction *I) const override;

	TargetLoweringBase::AtomicExpansionKind
	shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
	TargetLoweringBase::AtomicExpansionKind
	shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
	TargetLoweringBase::AtomicExpansionKind
	shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;

	TargetLoweringBase::AtomicExpansionKind
	shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;

	bool useLoadStackGuardNode() const override;
	TargetLoweringBase::LegalizeTypeAction
	getPreferredVectorAction(MVT VT) const override;

	/// If the target has a standard location for the stack protector cookie,
	/// returns the address of that location. Otherwise, returns nullptr.
	Value *getIRStackGuard(IRBuilderBase &IRB) const override;

	void insertSSPDeclarations(Module &M) const override;
	Value *getSDagStackGuard(const Module &M) const override;
	Function *getSSPStackGuardCheck(const Module &M) const override;

	/// If the target has a standard location for the unsafe stack pointer,
	/// returns the address of that location. Otherwise, returns nullptr.
	Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;

	/// If a physical register, this returns the register that receives the
	/// exception address on entry to an EH pad.
	Register
	getExceptionPointerRegister(const Constant *PersonalityFn) const override {
	// FIXME: This is a guess. Has this been defined yet?
	return AArch64::X0;
	}

	/// If a physical register, this returns the register that receives the
	/// exception typeid on entry to a landing pad.
	Register
	getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
	// FIXME: This is a guess. Has this been defined yet?
	return AArch64::X1;
	}

	bool isIntDivCheap(EVT VT, AttributeList Attr) const override;

	bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
	const MachineFunction &MF) const override {
	// Do not merge to float value size (128 bytes) if no implicit
	// float attribute is set.

	bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);

	if (NoFloat)
	return (MemVT.getSizeInBits() <= 64);
	return true;
	}

	bool isCheapToSpeculateCttz() const override {
	return true;
	}

	bool isCheapToSpeculateCtlz() const override {
	return true;
	}

	bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;

	bool hasAndNotCompare(SDValue V) const override {
	// We can use bics for any scalar.
	return V.getValueType().isScalarInteger();
	}

	bool hasAndNot(SDValue Y) const override {
	EVT VT = Y.getValueType();

	if (!VT.isVector())
	return hasAndNotCompare(Y);

	TypeSize TS = VT.getSizeInBits();
	// TODO: We should be able to use bic/bif too for SVE.
	return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
	}

	bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
	SDValue X, ConstantSDNode XC, ConstantSDNode CC, SDValue Y,
	unsigned OldShiftOpcode, unsigned NewShiftOpcode,
	SelectionDAG &DAG) const override;

	bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;

	bool shouldTransformSignedTruncationCheck(EVT XVT,
	unsigned KeptBits) const override {
	// For vectors, we don't have a preference..
	if (XVT.isVector())
	return false;

	auto VTIsOk = [](EVT VT) -> bool {
	return VT == MVT::i8 \|\| VT == MVT::i16 \|\| VT == MVT::i32 \|\|
	VT == MVT::i64;
	};

	// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
	// XVT will be larger than KeptBitsVT.
	MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
	return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
	}

	bool preferIncOfAddToSubOfNot(EVT VT) const override;

	bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;

	bool hasBitPreservingFPLogic(EVT VT) const override {
	// FIXME: Is this always true? It should be true for vectors at least.
	return VT == MVT::f32 \|\| VT == MVT::f64;
	}

	bool supportSplitCSR(MachineFunction *MF) const override {
	return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
	MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
	}
	void initializeSplitCSR(MachineBasicBlock *Entry) const override;
	void insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;

	bool supportSwiftError() const override {
	return true;
	}

	/// Enable aggressive FMA fusion on targets that want it.
	bool enableAggressiveFMAFusion(EVT VT) const override;

	/// Returns the size of the platform's va_list object.
	unsigned getVaListSizeInBits(const DataLayout &DL) const override;

	/// Returns true if \p VecTy is a legal interleaved access type. This
	/// function checks the vector element type and the overall width of the
	/// vector.
	bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
	bool &UseScalable) const;

	/// Returns the number of interleaved accesses that will be generated when
	/// lowering accesses of the given type.
	unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
	bool UseScalable) const;

	MachineMemOperand::Flags getTargetMMOFlags(
	const Instruction &I) const override;

	bool functionArgumentNeedsConsecutiveRegisters(
	Type *Ty, CallingConv::ID CallConv, bool isVarArg,
	const DataLayout &DL) const override;

	/// Used for exception handling on Win64.
	bool needsFixedCatchObjects() const override;

	bool fallBackToDAGISel(const Instruction &Inst) const override;

	/// SVE code generation for fixed length vectors does not custom lower
	/// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
	/// merge. However, merging them creates a BUILD_VECTOR that is just as
	/// illegal as the original, thus leading to an infinite legalisation loop.
	/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
	/// vector types this override can be removed.
	bool mergeStoresAfterLegalization(EVT VT) const override;

	// If the platform/function should have a redzone, return the size in bytes.
	unsigned getRedZoneSize(const Function &F) const {
	if (F.hasFnAttribute(Attribute::NoRedZone))
	return 0;
	return 128;
	}

	bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
	EVT getPromotedVTForPredicate(EVT VT) const;

	EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
	bool AllowUnknown = false) const override;

	bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;

	private:
	/// Keep a pointer to the AArch64Subtarget around so that we can
	/// make the right decision when generating code for different targets.
	const AArch64Subtarget *Subtarget;

	bool isExtFreeImpl(const Instruction *Ext) const override;

	void addTypeForNEON(MVT VT);
	void addTypeForFixedLengthSVE(MVT VT);
	void addDRTypeForNEON(MVT VT);
	void addQRTypeForNEON(MVT VT);

	SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &DL, SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &InVals) const override;

	SDValue LowerCall(CallLoweringInfo & /CLI/,
	SmallVectorImpl<SDValue> &InVals) const override;

	SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
	CallingConv::ID CallConv, bool isVarArg,
	- const SmallVectorImpl<ISD::InputArg> &Ins,
	+ const SmallVectorImpl<CCValAssign> &RVLocs,
	const SDLoc &DL, SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
	SDValue ThisVal) const;

	SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

	bool
	isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;

	/// Finds the incoming stack arguments which overlap the given fixed stack
	/// object and incorporates their load into the current chain. This prevents
	/// an upcoming store from clobbering the stack argument before it's used.
	SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
	MachineFrameInfo &MFI, int ClobberedFI) const;

	bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;

	void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
	SDValue &Chain) const;

	bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	LLVMContext &Context) const override;

	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
	SelectionDAG &DAG) const override;

	SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
	unsigned Flag) const;
	SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
	unsigned Flag) const;
	SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
	unsigned Flag) const;
	SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
	unsigned Flag) const;
	template <class NodeTy>
	SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
	template <class NodeTy>
	SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
	template <class NodeTy>
	SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
	template <class NodeTy>
	SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
	SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
	const SDLoc &DL, SelectionDAG &DAG) const;
	SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
	SelectionDAG &DAG) const;
	SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
	SDValue TVal, SDValue FVal, const SDLoc &dl,
	SelectionDAG &DAG) const;
	SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
	unsigned NewOp) const;
	SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
	SDValue &Size,
	SelectionDAG &DAG) const;
	SDValue LowerSVEStructLoad(unsigned Intrinsic, ArrayRef<SDValue> LoadOps,
	EVT VT, SelectionDAG &DAG, const SDLoc &DL) const;

	SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
	SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
	SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
	SelectionDAG &DAG) const;
	SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
	SelectionDAG &DAG) const;

	SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const override;
	SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
	SmallVectorImpl<SDNode *> &Created) const override;
	SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
	int &ExtraSteps, bool &UseOneConst,
	bool Reciprocal) const override;
	SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
	int &ExtraSteps) const override;
	SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
	const DenormalMode &Mode) const override;
	SDValue getSqrtResultForDenormInput(SDValue Operand,
	SelectionDAG &DAG) const override;
	unsigned combineRepeatedFPDivisors() const override;

	ConstraintType getConstraintType(StringRef Constraint) const override;
	Register getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const override;

	/// Examine constraint string and operand type and determine a weight value.
	/// The operand object must already have been set up with the operand type.
	ConstraintWeight
	getSingleConstraintMatchWeight(AsmOperandInfo &info,
	const char *constraint) const override;

	std::pair<unsigned, const TargetRegisterClass *>
	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint, MVT VT) const override;

	const char *LowerXConstraint(EVT ConstraintVT) const override;

	void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const override;

	unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
	if (ConstraintCode == "Q")
	return InlineAsm::Constraint_Q;
	// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
	// followed by llvm_unreachable so we'll leave them unimplemented in
	// the backend for now.
	return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
	}

	bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
	bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
	bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
	bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
	bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
	bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
	ISD::MemIndexedMode &AM, bool &IsInc,
	SelectionDAG &DAG) const;
	bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
	ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const override;
	bool getPostIndexedAddressParts(SDNode N, SDNode Op, SDValue &Base,
	SDValue &Offset, ISD::MemIndexedMode &AM,
	SelectionDAG &DAG) const override;

	void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const override;
	void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const;
	void ReplaceExtractSubVectorResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const;

	bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;

	void finalizeLowering(MachineFunction &MF) const override;

	bool shouldLocalize(const MachineInstr &MI,
	const TargetTransformInfo *TTI) const override;

	bool SimplifyDemandedBitsForTargetNode(SDValue Op,
	const APInt &OriginalDemandedBits,
	const APInt &OriginalDemandedElts,
	KnownBits &Known,
	TargetLoweringOpt &TLO,
	unsigned Depth) const override;

	bool isTargetCanonicalConstantNode(SDValue Op) const override;

	// Normally SVE is only used for byte size vectors that do not fit within a
	// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
	// used for 64bit and 128bit vectors as well.
	bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;

	// With the exception of data-predicate transitions, no instructions are
	// required to cast between legal scalable vector types. However:
	// 1. Packed and unpacked types have different bit lengths, meaning BITCAST
	// is not universally useable.
	// 2. Most unpacked integer types are not legal and thus integer extends
	// cannot be used to convert between unpacked and packed types.
	// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
	// to transition between unpacked and packed types of the same element type,
	// with BITCAST used otherwise.
	// This function does not handle predicate bitcasts.
	SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;

	bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
	LLT Ty2) const override;
	};

	namespace AArch64 {
	FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
	const TargetLibraryInfo *libInfo);
	} // end namespace AArch64

	} // end namespace llvm

	#endif
	diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
	index f7d139adc63b..f6b7d1ffc6d2 100644
	--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
	+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
	@@ -1,12973 +1,12975 @@
	//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	/// Custom DAG lowering for SI
	//
	//===----------------------------------------------------------------------===//

	#include "SIISelLowering.h"
	#include "AMDGPU.h"
	#include "AMDGPUInstrInfo.h"
	#include "AMDGPUTargetMachine.h"
	#include "SIMachineFunctionInfo.h"
	#include "SIRegisterInfo.h"
	#include "llvm/ADT/FloatingPointMode.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
	#include "llvm/Analysis/OptimizationRemarkEmitter.h"
	#include "llvm/BinaryFormat/ELF.h"
	#include "llvm/CodeGen/Analysis.h"
	#include "llvm/CodeGen/FunctionLoweringInfo.h"
	#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
	#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineLoopInfo.h"
	#include "llvm/IR/DiagnosticInfo.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/IntrinsicsAMDGPU.h"
	#include "llvm/IR/IntrinsicsR600.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/KnownBits.h"

	using namespace llvm;

	#define DEBUG_TYPE "si-lower"

	STATISTIC(NumTailCalls, "Number of tail calls");

	static cl::opt<bool> DisableLoopAlignment(
	"amdgpu-disable-loop-alignment",
	cl::desc("Do not align and prefetch loops"),
	cl::init(false));

	static cl::opt<bool> UseDivergentRegisterIndexing(
	"amdgpu-use-divergent-register-indexing",
	cl::Hidden,
	cl::desc("Use indirect register addressing for divergent indexes"),
	cl::init(false));

	static bool hasFP32Denormals(const MachineFunction &MF) {
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	return Info->getMode().allFP32Denormals();
	}

	static bool hasFP64FP16Denormals(const MachineFunction &MF) {
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	return Info->getMode().allFP64FP16Denormals();
	}

	static unsigned findFirstFreeSGPR(CCState &CCInfo) {
	unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
	for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
	if (!CCInfo.isAllocated(AMDGPU::SGPR0 + Reg)) {
	return AMDGPU::SGPR0 + Reg;
	}
	}
	llvm_unreachable("Cannot allocate sgpr");
	}

	SITargetLowering::SITargetLowering(const TargetMachine &TM,
	const GCNSubtarget &STI)
	: AMDGPUTargetLowering(TM, STI),
	Subtarget(&STI) {
	addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
	addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);

	addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
	addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);

	addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);

	const SIRegisterInfo *TRI = STI.getRegisterInfo();
	const TargetRegisterClass *V64RegClass = TRI->getVGPR64Class();

	addRegisterClass(MVT::f64, V64RegClass);
	addRegisterClass(MVT::v2f32, V64RegClass);

	addRegisterClass(MVT::v3i32, &AMDGPU::SGPR_96RegClass);
	addRegisterClass(MVT::v3f32, TRI->getVGPRClassForBitWidth(96));

	addRegisterClass(MVT::v2i64, &AMDGPU::SGPR_128RegClass);
	addRegisterClass(MVT::v2f64, &AMDGPU::SGPR_128RegClass);

	addRegisterClass(MVT::v4i32, &AMDGPU::SGPR_128RegClass);
	addRegisterClass(MVT::v4f32, TRI->getVGPRClassForBitWidth(128));

	addRegisterClass(MVT::v5i32, &AMDGPU::SGPR_160RegClass);
	addRegisterClass(MVT::v5f32, TRI->getVGPRClassForBitWidth(160));

	addRegisterClass(MVT::v6i32, &AMDGPU::SGPR_192RegClass);
	addRegisterClass(MVT::v6f32, TRI->getVGPRClassForBitWidth(192));

	addRegisterClass(MVT::v3i64, &AMDGPU::SGPR_192RegClass);
	addRegisterClass(MVT::v3f64, TRI->getVGPRClassForBitWidth(192));

	addRegisterClass(MVT::v7i32, &AMDGPU::SGPR_224RegClass);
	addRegisterClass(MVT::v7f32, TRI->getVGPRClassForBitWidth(224));

	addRegisterClass(MVT::v8i32, &AMDGPU::SGPR_256RegClass);
	addRegisterClass(MVT::v8f32, TRI->getVGPRClassForBitWidth(256));

	addRegisterClass(MVT::v4i64, &AMDGPU::SGPR_256RegClass);
	addRegisterClass(MVT::v4f64, TRI->getVGPRClassForBitWidth(256));

	addRegisterClass(MVT::v16i32, &AMDGPU::SGPR_512RegClass);
	addRegisterClass(MVT::v16f32, TRI->getVGPRClassForBitWidth(512));

	addRegisterClass(MVT::v8i64, &AMDGPU::SGPR_512RegClass);
	addRegisterClass(MVT::v8f64, TRI->getVGPRClassForBitWidth(512));

	addRegisterClass(MVT::v16i64, &AMDGPU::SGPR_1024RegClass);
	addRegisterClass(MVT::v16f64, TRI->getVGPRClassForBitWidth(1024));

	if (Subtarget->has16BitInsts()) {
	addRegisterClass(MVT::i16, &AMDGPU::SReg_32RegClass);
	addRegisterClass(MVT::f16, &AMDGPU::SReg_32RegClass);

	// Unless there are also VOP3P operations, not operations are really legal.
	addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32RegClass);
	addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
	addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
	addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
	addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
	addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
	addRegisterClass(MVT::v16i16, &AMDGPU::SGPR_256RegClass);
	addRegisterClass(MVT::v16f16, &AMDGPU::SGPR_256RegClass);
	}

	addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
	addRegisterClass(MVT::v32f32, TRI->getVGPRClassForBitWidth(1024));

	computeRegisterProperties(Subtarget->getRegisterInfo());

	// The boolean content concept here is too inflexible. Compares only ever
	// really produce a 1-bit result. Any copy/extend from these will turn into a
	// select, and zext/1 or sext/-1 are equally cheap. Arbitrarily choose 0/1, as
	// it's what most targets use.
	setBooleanContents(ZeroOrOneBooleanContent);
	setBooleanVectorContents(ZeroOrOneBooleanContent);

	// We need to custom lower vector stores from local memory
	setOperationAction(ISD::LOAD,
	{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
	MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
	MVT::v32i32},
	Custom);

	setOperationAction(ISD::STORE,
	{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
	MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32, MVT::i1,
	MVT::v32i32},
	Custom);

	setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
	setTruncStoreAction(MVT::v3i32, MVT::v3i16, Expand);
	setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
	setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
	setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
	setTruncStoreAction(MVT::v32i32, MVT::v32i16, Expand);
	setTruncStoreAction(MVT::v2i32, MVT::v2i8, Expand);
	setTruncStoreAction(MVT::v4i32, MVT::v4i8, Expand);
	setTruncStoreAction(MVT::v8i32, MVT::v8i8, Expand);
	setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
	setTruncStoreAction(MVT::v32i32, MVT::v32i8, Expand);
	setTruncStoreAction(MVT::v2i16, MVT::v2i8, Expand);
	setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
	setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
	setTruncStoreAction(MVT::v16i16, MVT::v16i8, Expand);
	setTruncStoreAction(MVT::v32i16, MVT::v32i8, Expand);

	setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
	setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
	setTruncStoreAction(MVT::v4i64, MVT::v4i8, Expand);
	setTruncStoreAction(MVT::v8i64, MVT::v8i8, Expand);
	setTruncStoreAction(MVT::v8i64, MVT::v8i16, Expand);
	setTruncStoreAction(MVT::v8i64, MVT::v8i32, Expand);
	setTruncStoreAction(MVT::v16i64, MVT::v16i32, Expand);

	setOperationAction(ISD::GlobalAddress, {MVT::i32, MVT::i64}, Custom);

	setOperationAction(ISD::SELECT, MVT::i1, Promote);
	setOperationAction(ISD::SELECT, MVT::i64, Custom);
	setOperationAction(ISD::SELECT, MVT::f64, Promote);
	AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);

	setOperationAction(ISD::SELECT_CC,
	{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);

	setOperationAction(ISD::SETCC, MVT::i1, Promote);
	setOperationAction(ISD::SETCC, {MVT::v2i1, MVT::v4i1}, Expand);
	AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);

	setOperationAction(ISD::TRUNCATE,
	{MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32,
	MVT::v6i32, MVT::v7i32, MVT::v8i32, MVT::v16i32},
	Expand);
	setOperationAction(ISD::FP_ROUND,
	{MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
	MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
	Expand);

	setOperationAction(ISD::SIGN_EXTEND_INREG,
	{MVT::v2i1, MVT::v4i1, MVT::v2i8, MVT::v4i8, MVT::v2i16,
	MVT::v3i16, MVT::v4i16, MVT::Other},
	Custom);

	setOperationAction(ISD::BRCOND, MVT::Other, Custom);
	setOperationAction(ISD::BR_CC,
	{MVT::i1, MVT::i32, MVT::i64, MVT::f32, MVT::f64}, Expand);

	setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);

	setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i32, Legal);

	setOperationAction({ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, MVT::i64,
	Expand);

	#if 0
	setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);
	#endif

	// We only support LOAD/STORE and vector manipulation ops for vectors
	// with > 4 elements.
	for (MVT VT :
	{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64,
	MVT::v2f64, MVT::v4i16, MVT::v4f16, MVT::v3i64, MVT::v3f64,
	MVT::v6i32, MVT::v6f32, MVT::v4i64, MVT::v4f64, MVT::v8i64,
	MVT::v8f64, MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16,
	MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32}) {
	for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
	switch (Op) {
	case ISD::LOAD:
	case ISD::STORE:
	case ISD::BUILD_VECTOR:
	case ISD::BITCAST:
	+ case ISD::UNDEF:
	case ISD::EXTRACT_VECTOR_ELT:
	case ISD::INSERT_VECTOR_ELT:
	case ISD::EXTRACT_SUBVECTOR:
	case ISD::SCALAR_TO_VECTOR:
	break;
	case ISD::INSERT_SUBVECTOR:
	case ISD::CONCAT_VECTORS:
	setOperationAction(Op, VT, Custom);
	break;
	default:
	setOperationAction(Op, VT, Expand);
	break;
	}
	}
	}

	setOperationAction(ISD::FP_EXTEND, MVT::v4f32, Expand);

	// TODO: For dynamic 64-bit vector inserts/extracts, should emit a pseudo that
	// is expanded to avoid having two separate loops in case the index is a VGPR.

	// Most operations are naturally 32-bit vector operations. We only support
	// load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
	for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);

	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);

	setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
	}

	for (MVT Vec64 : { MVT::v3i64, MVT::v3f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32);

	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v6i32);

	setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v6i32);
	}

	for (MVT Vec64 : { MVT::v4i64, MVT::v4f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32);

	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v8i32);

	setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v8i32);
	}

	for (MVT Vec64 : { MVT::v8i64, MVT::v8f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32);

	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v16i32);

	setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v16i32);
	}

	for (MVT Vec64 : { MVT::v16i64, MVT::v16f64 }) {
	setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32);

	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
	AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v32i32);

	setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
	AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v32i32);
	}

	setOperationAction(ISD::VECTOR_SHUFFLE,
	{MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32},
	Expand);

	setOperationAction(ISD::BUILD_VECTOR, {MVT::v4f16, MVT::v4i16}, Custom);

	// Avoid stack access for these.
	// TODO: Generalize to more vector types.
	setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
	{MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
	MVT::v4i16, MVT::v4f16},
	Custom);

	// Deal with vec3 vector operations when widened to vec4.
	setOperationAction(ISD::INSERT_SUBVECTOR,
	{MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32}, Custom);

	// Deal with vec5/6/7 vector operations when widened to vec8.
	setOperationAction(ISD::INSERT_SUBVECTOR,
	{MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32,
	MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32},
	Custom);

	// BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling,
	// and output demarshalling
	setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i32, MVT::i64}, Custom);

	// We can't return success/failure, only the old value,
	// let LLVM add the comparison
	setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, {MVT::i32, MVT::i64},
	Expand);

	if (Subtarget->hasFlatAddressSpace())
	setOperationAction(ISD::ADDRSPACECAST, {MVT::i32, MVT::i64}, Custom);

	setOperationAction(ISD::BITREVERSE, {MVT::i32, MVT::i64}, Legal);

	// FIXME: This should be narrowed to i32, but that only happens if i64 is
	// illegal.
	// FIXME: Should lower sub-i32 bswaps to bit-ops without v_perm_b32.
	setOperationAction(ISD::BSWAP, {MVT::i64, MVT::i32}, Legal);

	// On SI this is s_memtime and s_memrealtime on VI.
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
	setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);

	if (Subtarget->has16BitInsts()) {
	setOperationAction({ISD::FPOW, ISD::FPOWI}, MVT::f16, Promote);
	setOperationAction({ISD::FLOG, ISD::FEXP, ISD::FLOG10}, MVT::f16, Custom);
	}

	if (Subtarget->hasMadMacF32Insts())
	setOperationAction(ISD::FMAD, MVT::f32, Legal);

	if (!Subtarget->hasBFI())
	// fcopysign can be done in a single instruction with BFI.
	setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);

	if (!Subtarget->hasBCNT(32))
	setOperationAction(ISD::CTPOP, MVT::i32, Expand);

	if (!Subtarget->hasBCNT(64))
	setOperationAction(ISD::CTPOP, MVT::i64, Expand);

	if (Subtarget->hasFFBH())
	setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);

	if (Subtarget->hasFFBL())
	setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);

	// We only really have 32-bit BFE instructions (and 16-bit on VI).
	//
	// On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
	// effort to match them now. We want this to be false for i64 cases when the
	// extraction isn't restricted to the upper or lower half. Ideally we would
	// have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
	// span the midpoint are probably relatively rare, so don't worry about them
	// for now.
	if (Subtarget->hasBFE())
	setHasExtractBitsInsn(true);

	// Clamp modifier on add/sub
	if (Subtarget->hasIntClamp())
	setOperationAction({ISD::UADDSAT, ISD::USUBSAT}, MVT::i32, Legal);

	if (Subtarget->hasAddNoCarry())
	setOperationAction({ISD::SADDSAT, ISD::SSUBSAT}, {MVT::i16, MVT::i32},
	Legal);

	setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, {MVT::f32, MVT::f64},
	Custom);

	// These are really only legal for ieee_mode functions. We should be avoiding
	// them for functions that don't have ieee_mode enabled, so just say they are
	// legal.
	setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
	{MVT::f32, MVT::f64}, Legal);

	if (Subtarget->haveRoundOpsF64())
	setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FRINT}, MVT::f64, Legal);
	else
	setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, ISD::FFLOOR},
	MVT::f64, Custom);

	setOperationAction(ISD::FFLOOR, MVT::f64, Legal);

	setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom);
	setOperationAction(ISD::FDIV, MVT::f64, Custom);

	if (Subtarget->has16BitInsts()) {
	setOperationAction({ISD::Constant, ISD::SMIN, ISD::SMAX, ISD::UMIN,
	ISD::UMAX, ISD::UADDSAT, ISD::USUBSAT},
	MVT::i16, Legal);

	AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);

	setOperationAction({ISD::ROTR, ISD::ROTL, ISD::SELECT_CC, ISD::BR_CC},
	MVT::i16, Expand);

	setOperationAction({ISD::SIGN_EXTEND, ISD::SDIV, ISD::UDIV, ISD::SREM,
	ISD::UREM, ISD::BITREVERSE, ISD::CTTZ,
	ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
	ISD::CTPOP},
	MVT::i16, Promote);

	setOperationAction(ISD::LOAD, MVT::i16, Custom);

	setTruncStoreAction(MVT::i64, MVT::i16, Expand);

	setOperationAction(ISD::FP16_TO_FP, MVT::i16, Promote);
	AddPromotedToType(ISD::FP16_TO_FP, MVT::i16, MVT::i32);
	setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
	AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);

	setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, MVT::i16, Custom);

	// F16 - Constant Actions.
	setOperationAction(ISD::ConstantFP, MVT::f16, Legal);

	// F16 - Load/Store Actions.
	setOperationAction(ISD::LOAD, MVT::f16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::f16, MVT::i16);
	setOperationAction(ISD::STORE, MVT::f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16);

	// F16 - VOP1 Actions.
	setOperationAction(
	{ISD::FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
	MVT::f16, Custom);

	setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom);

	setOperationAction(
	{ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::SINT_TO_FP, ISD::UINT_TO_FP},
	MVT::f16, Promote);

	// F16 - VOP2 Actions.
	setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);

	setOperationAction(ISD::FDIV, MVT::f16, Custom);

	// F16 - VOP3 Actions.
	setOperationAction(ISD::FMA, MVT::f16, Legal);
	if (STI.hasMadF16())
	setOperationAction(ISD::FMAD, MVT::f16, Legal);

	for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
	MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
	for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
	switch (Op) {
	case ISD::LOAD:
	case ISD::STORE:
	case ISD::BUILD_VECTOR:
	case ISD::BITCAST:
	+ case ISD::UNDEF:
	case ISD::EXTRACT_VECTOR_ELT:
	case ISD::INSERT_VECTOR_ELT:
	case ISD::INSERT_SUBVECTOR:
	case ISD::EXTRACT_SUBVECTOR:
	case ISD::SCALAR_TO_VECTOR:
	break;
	case ISD::CONCAT_VECTORS:
	setOperationAction(Op, VT, Custom);
	break;
	default:
	setOperationAction(Op, VT, Expand);
	break;
	}
	}
	}

	// v_perm_b32 can handle either of these.
	setOperationAction(ISD::BSWAP, {MVT::i16, MVT::v2i16}, Legal);
	setOperationAction(ISD::BSWAP, MVT::v4i16, Custom);

	// XXX - Do these do anything? Vector constants turn into build_vector.
	setOperationAction(ISD::Constant, {MVT::v2i16, MVT::v2f16}, Legal);

	setOperationAction(ISD::UNDEF, {MVT::v2i16, MVT::v2f16}, Legal);

	setOperationAction(ISD::STORE, MVT::v2i16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
	setOperationAction(ISD::STORE, MVT::v2f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);

	setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
	setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);

	setOperationAction(ISD::AND, MVT::v2i16, Promote);
	AddPromotedToType(ISD::AND, MVT::v2i16, MVT::i32);
	setOperationAction(ISD::OR, MVT::v2i16, Promote);
	AddPromotedToType(ISD::OR, MVT::v2i16, MVT::i32);
	setOperationAction(ISD::XOR, MVT::v2i16, Promote);
	AddPromotedToType(ISD::XOR, MVT::v2i16, MVT::i32);

	setOperationAction(ISD::LOAD, MVT::v4i16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v4i16, MVT::v2i32);
	setOperationAction(ISD::LOAD, MVT::v4f16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v4f16, MVT::v2i32);

	setOperationAction(ISD::STORE, MVT::v4i16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
	setOperationAction(ISD::STORE, MVT::v4f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);

	setOperationAction(ISD::LOAD, MVT::v8i16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32);
	setOperationAction(ISD::LOAD, MVT::v8f16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32);

	setOperationAction(ISD::STORE, MVT::v4i16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
	setOperationAction(ISD::STORE, MVT::v4f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);

	setOperationAction(ISD::STORE, MVT::v8i16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32);
	setOperationAction(ISD::STORE, MVT::v8f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);

	setOperationAction(ISD::LOAD, MVT::v16i16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v16i16, MVT::v8i32);
	setOperationAction(ISD::LOAD, MVT::v16f16, Promote);
	AddPromotedToType(ISD::LOAD, MVT::v16f16, MVT::v8i32);

	setOperationAction(ISD::STORE, MVT::v16i16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v16i16, MVT::v8i32);
	setOperationAction(ISD::STORE, MVT::v16f16, Promote);
	AddPromotedToType(ISD::STORE, MVT::v16f16, MVT::v8i32);

	setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
	MVT::v2i32, Expand);
	setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Expand);

	setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
	MVT::v4i32, Expand);

	setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
	MVT::v8i32, Expand);

	if (!Subtarget->hasVOP3PInsts())
	setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16}, Custom);

	setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
	// This isn't really legal, but this avoids the legalizer unrolling it (and
	// allows matching fneg (fabs x) patterns)
	setOperationAction(ISD::FABS, MVT::v2f16, Legal);

	setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, MVT::f16, Custom);
	setOperationAction({ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE}, MVT::f16, Legal);

	setOperationAction({ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE},
	{MVT::v4f16, MVT::v8f16, MVT::v16f16}, Custom);

	setOperationAction({ISD::FMINNUM, ISD::FMAXNUM},
	{MVT::v4f16, MVT::v8f16, MVT::v16f16}, Expand);

	for (MVT Vec16 : {MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16}) {
	setOperationAction(
	{ISD::BUILD_VECTOR, ISD::EXTRACT_VECTOR_ELT, ISD::SCALAR_TO_VECTOR},
	Vec16, Custom);
	setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
	}
	}

	if (Subtarget->hasVOP3PInsts()) {
	setOperationAction({ISD::ADD, ISD::SUB, ISD::MUL, ISD::SHL, ISD::SRL,
	ISD::SRA, ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX,
	ISD::UADDSAT, ISD::USUBSAT, ISD::SADDSAT, ISD::SSUBSAT},
	MVT::v2i16, Legal);

	setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FMINNUM_IEEE,
	ISD::FMAXNUM_IEEE, ISD::FCANONICALIZE},
	MVT::v2f16, Legal);

	setOperationAction(ISD::EXTRACT_VECTOR_ELT, {MVT::v2i16, MVT::v2f16},
	Custom);

	setOperationAction(ISD::VECTOR_SHUFFLE,
	{MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
	MVT::v16f16, MVT::v16i16},
	Custom);

	for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
	// Split vector operations.
	setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL, ISD::ADD, ISD::SUB,
	ISD::MUL, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX,
	ISD::UADDSAT, ISD::SADDSAT, ISD::USUBSAT,
	ISD::SSUBSAT},
	VT, Custom);

	for (MVT VT : {MVT::v4f16, MVT::v8f16, MVT::v16f16})
	// Split vector operations.
	setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FCANONICALIZE},
	VT, Custom);

	setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, {MVT::v2f16, MVT::v4f16},
	Custom);

	setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
	setOperationAction(ISD::SELECT, {MVT::v4i16, MVT::v4f16}, Custom);

	if (Subtarget->hasPackedFP32Ops()) {
	setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG},
	MVT::v2f32, Legal);
	setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA},
	{MVT::v4f32, MVT::v8f32, MVT::v16f32, MVT::v32f32},
	Custom);
	}
	}

	setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v4f16, Custom);

	if (Subtarget->has16BitInsts()) {
	setOperationAction(ISD::SELECT, MVT::v2i16, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v2i16, MVT::i32);
	setOperationAction(ISD::SELECT, MVT::v2f16, Promote);
	AddPromotedToType(ISD::SELECT, MVT::v2f16, MVT::i32);
	} else {
	// Legalization hack.
	setOperationAction(ISD::SELECT, {MVT::v2i16, MVT::v2f16}, Custom);

	setOperationAction({ISD::FNEG, ISD::FABS}, MVT::v2f16, Custom);
	}

	setOperationAction(ISD::SELECT,
	{MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
	MVT::v8i16, MVT::v8f16, MVT::v16i16, MVT::v16f16},
	Custom);

	setOperationAction({ISD::SMULO, ISD::UMULO}, MVT::i64, Custom);

	if (Subtarget->hasMad64_32())
	setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, MVT::i32, Custom);

	setOperationAction(ISD::INTRINSIC_WO_CHAIN,
	{MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
	MVT::v2i16, MVT::v2f16},
	Custom);

	setOperationAction(ISD::INTRINSIC_W_CHAIN,
	{MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16,
	MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16,
	MVT::i16, MVT::i8},
	Custom);

	setOperationAction(ISD::INTRINSIC_VOID,
	{MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16,
	MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16,
	MVT::i8},
	Custom);

	setTargetDAGCombine({ISD::ADD,
	ISD::ADDCARRY,
	ISD::SUB,
	ISD::SUBCARRY,
	ISD::FADD,
	ISD::FSUB,
	ISD::FMINNUM,
	ISD::FMAXNUM,
	ISD::FMINNUM_IEEE,
	ISD::FMAXNUM_IEEE,
	ISD::FMA,
	ISD::SMIN,
	ISD::SMAX,
	ISD::UMIN,
	ISD::UMAX,
	ISD::SETCC,
	ISD::AND,
	ISD::OR,
	ISD::XOR,
	ISD::SINT_TO_FP,
	ISD::UINT_TO_FP,
	ISD::FCANONICALIZE,
	ISD::SCALAR_TO_VECTOR,
	ISD::ZERO_EXTEND,
	ISD::SIGN_EXTEND_INREG,
	ISD::EXTRACT_VECTOR_ELT,
	ISD::INSERT_VECTOR_ELT});

	// All memory operations. Some folding on the pointer operand is done to help
	// matching the constant offsets in the addressing modes.
	setTargetDAGCombine({ISD::LOAD,
	ISD::STORE,
	ISD::ATOMIC_LOAD,
	ISD::ATOMIC_STORE,
	ISD::ATOMIC_CMP_SWAP,
	ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
	ISD::ATOMIC_SWAP,
	ISD::ATOMIC_LOAD_ADD,
	ISD::ATOMIC_LOAD_SUB,
	ISD::ATOMIC_LOAD_AND,
	ISD::ATOMIC_LOAD_OR,
	ISD::ATOMIC_LOAD_XOR,
	ISD::ATOMIC_LOAD_NAND,
	ISD::ATOMIC_LOAD_MIN,
	ISD::ATOMIC_LOAD_MAX,
	ISD::ATOMIC_LOAD_UMIN,
	ISD::ATOMIC_LOAD_UMAX,
	ISD::ATOMIC_LOAD_FADD,
	ISD::INTRINSIC_VOID,
	ISD::INTRINSIC_W_CHAIN});

	// FIXME: In other contexts we pretend this is a per-function property.
	setStackPointerRegisterToSaveRestore(AMDGPU::SGPR32);

	setSchedulingPreference(Sched::RegPressure);
	}

	const GCNSubtarget *SITargetLowering::getSubtarget() const {
	return Subtarget;
	}

	//===----------------------------------------------------------------------===//
	// TargetLowering queries
	//===----------------------------------------------------------------------===//

	// v_mad_mix* support a conversion from f16 to f32.
	//
	// There is only one special case when denormals are enabled we don't currently,
	// where this is OK to use.
	bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
	EVT DestVT, EVT SrcVT) const {
	return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) \|\|
	(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
	DestVT.getScalarType() == MVT::f32 &&
	SrcVT.getScalarType() == MVT::f16 &&
	// TODO: This probably only requires no input flushing?
	!hasFP32Denormals(DAG.getMachineFunction());
	}

	bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
	LLT DestTy, LLT SrcTy) const {
	return ((Opcode == TargetOpcode::G_FMAD && Subtarget->hasMadMixInsts()) \|\|
	(Opcode == TargetOpcode::G_FMA && Subtarget->hasFmaMixInsts())) &&
	DestTy.getScalarSizeInBits() == 32 &&
	SrcTy.getScalarSizeInBits() == 16 &&
	// TODO: This probably only requires no input flushing?
	!hasFP32Denormals(*MI.getMF());
	}

	bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
	// SI has some legal vector types, but no legal vector operations. Say no
	// shuffles are legal in order to prefer scalarizing some vector operations.
	return false;
	}

	MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
	CallingConv::ID CC,
	EVT VT) const {
	if (CC == CallingConv::AMDGPU_KERNEL)
	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);

	if (VT.isVector()) {
	EVT ScalarVT = VT.getScalarType();
	unsigned Size = ScalarVT.getSizeInBits();
	if (Size == 16) {
	if (Subtarget->has16BitInsts())
	return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
	return VT.isInteger() ? MVT::i32 : MVT::f32;
	}

	if (Size < 16)
	return Subtarget->has16BitInsts() ? MVT::i16 : MVT::i32;
	return Size == 32 ? ScalarVT.getSimpleVT() : MVT::i32;
	}

	if (VT.getSizeInBits() > 32)
	return MVT::i32;

	return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
	}

	unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
	CallingConv::ID CC,
	EVT VT) const {
	if (CC == CallingConv::AMDGPU_KERNEL)
	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);

	if (VT.isVector()) {
	unsigned NumElts = VT.getVectorNumElements();
	EVT ScalarVT = VT.getScalarType();
	unsigned Size = ScalarVT.getSizeInBits();

	// FIXME: Should probably promote 8-bit vectors to i16.
	if (Size == 16 && Subtarget->has16BitInsts())
	return (NumElts + 1) / 2;

	if (Size <= 32)
	return NumElts;

	if (Size > 32)
	return NumElts * ((Size + 31) / 32);
	} else if (VT.getSizeInBits() > 32)
	return (VT.getSizeInBits() + 31) / 32;

	return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
	}

	unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
	LLVMContext &Context, CallingConv::ID CC,
	EVT VT, EVT &IntermediateVT,
	unsigned &NumIntermediates, MVT &RegisterVT) const {
	if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
	unsigned NumElts = VT.getVectorNumElements();
	EVT ScalarVT = VT.getScalarType();
	unsigned Size = ScalarVT.getSizeInBits();
	// FIXME: We should fix the ABI to be the same on targets without 16-bit
	// support, but unless we can properly handle 3-vectors, it will be still be
	// inconsistent.
	if (Size == 16 && Subtarget->has16BitInsts()) {
	RegisterVT = VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
	IntermediateVT = RegisterVT;
	NumIntermediates = (NumElts + 1) / 2;
	return NumIntermediates;
	}

	if (Size == 32) {
	RegisterVT = ScalarVT.getSimpleVT();
	IntermediateVT = RegisterVT;
	NumIntermediates = NumElts;
	return NumIntermediates;
	}

	if (Size < 16 && Subtarget->has16BitInsts()) {
	// FIXME: Should probably form v2i16 pieces
	RegisterVT = MVT::i16;
	IntermediateVT = ScalarVT;
	NumIntermediates = NumElts;
	return NumIntermediates;
	}


	if (Size != 16 && Size <= 32) {
	RegisterVT = MVT::i32;
	IntermediateVT = ScalarVT;
	NumIntermediates = NumElts;
	return NumIntermediates;
	}

	if (Size > 32) {
	RegisterVT = MVT::i32;
	IntermediateVT = RegisterVT;
	NumIntermediates = NumElts * ((Size + 31) / 32);
	return NumIntermediates;
	}
	}

	return TargetLowering::getVectorTypeBreakdownForCallingConv(
	Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
	}

	static EVT memVTFromImageData(Type *Ty, unsigned DMaskLanes) {
	assert(DMaskLanes != 0);

	if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
	unsigned NumElts = std::min(DMaskLanes, VT->getNumElements());
	return EVT::getVectorVT(Ty->getContext(),
	EVT::getEVT(VT->getElementType()),
	NumElts);
	}

	return EVT::getEVT(Ty);
	}

	// Peek through TFE struct returns to only use the data size.
	static EVT memVTFromImageReturn(Type *Ty, unsigned DMaskLanes) {
	auto *ST = dyn_cast<StructType>(Ty);
	if (!ST)
	return memVTFromImageData(Ty, DMaskLanes);

	// Some intrinsics return an aggregate type - special case to work out the
	// correct memVT.
	//
	// Only limited forms of aggregate type currently expected.
	if (ST->getNumContainedTypes() != 2 \|\|
	!ST->getContainedType(1)->isIntegerTy(32))
	return EVT();
	return memVTFromImageData(ST->getContainedType(0), DMaskLanes);
	}

	bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
	const CallInst &CI,
	MachineFunction &MF,
	unsigned IntrID) const {
	Info.flags = MachineMemOperand::MONone;
	if (CI.hasMetadata(LLVMContext::MD_invariant_load))
	Info.flags \|= MachineMemOperand::MOInvariant;

	if (const AMDGPU::RsrcIntrinsic *RsrcIntr =
	AMDGPU::lookupRsrcIntrinsic(IntrID)) {
	AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
	(Intrinsic::ID)IntrID);
	if (Attr.hasFnAttr(Attribute::ReadNone))
	return false;

	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());

	if (RsrcIntr->IsImage) {
	Info.ptrVal = MFI->getImagePSV(TM);
	Info.align.reset();
	} else {
	Info.ptrVal = MFI->getBufferPSV(TM);
	}

	Info.flags \|= MachineMemOperand::MODereferenceable;
	if (Attr.hasFnAttr(Attribute::ReadOnly)) {
	unsigned DMaskLanes = 4;

	if (RsrcIntr->IsImage) {
	const AMDGPU::ImageDimIntrinsicInfo *Intr
	= AMDGPU::getImageDimIntrinsicInfo(IntrID);
	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
	AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);

	if (!BaseOpcode->Gather4) {
	// If this isn't a gather, we may have excess loaded elements in the
	// IR type. Check the dmask for the real number of elements loaded.
	unsigned DMask
	= cast<ConstantInt>(CI.getArgOperand(0))->getZExtValue();
	DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
	}

	Info.memVT = memVTFromImageReturn(CI.getType(), DMaskLanes);
	} else
	Info.memVT = EVT::getEVT(CI.getType());

	// FIXME: What does alignment mean for an image?
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.flags \|= MachineMemOperand::MOLoad;
	} else if (Attr.hasFnAttr(Attribute::WriteOnly)) {
	Info.opc = ISD::INTRINSIC_VOID;

	Type *DataTy = CI.getArgOperand(0)->getType();
	if (RsrcIntr->IsImage) {
	unsigned DMask = cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue();
	unsigned DMaskLanes = DMask == 0 ? 1 : countPopulation(DMask);
	Info.memVT = memVTFromImageData(DataTy, DMaskLanes);
	} else
	Info.memVT = EVT::getEVT(DataTy);

	Info.flags \|= MachineMemOperand::MOStore;
	} else {
	// Atomic
	Info.opc = CI.getType()->isVoidTy() ? ISD::INTRINSIC_VOID :
	ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType());
	Info.flags \|= MachineMemOperand::MOLoad \|
	MachineMemOperand::MOStore \|
	MachineMemOperand::MODereferenceable;

	// XXX - Should this be volatile without known ordering?
	Info.flags \|= MachineMemOperand::MOVolatile;

	switch (IntrID) {
	default:
	break;
	case Intrinsic::amdgcn_raw_buffer_load_lds:
	case Intrinsic::amdgcn_struct_buffer_load_lds: {
	unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
	Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
	return true;
	}
	}
	}
	return true;
	}

	switch (IntrID) {
	case Intrinsic::amdgcn_atomic_inc:
	case Intrinsic::amdgcn_atomic_dec:
	case Intrinsic::amdgcn_ds_ordered_add:
	case Intrinsic::amdgcn_ds_ordered_swap:
	case Intrinsic::amdgcn_ds_fadd:
	case Intrinsic::amdgcn_ds_fmin:
	case Intrinsic::amdgcn_ds_fmax: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getType());
	Info.ptrVal = CI.getOperand(0);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore;

	const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(4));
	if (!Vol->isZero())
	Info.flags \|= MachineMemOperand::MOVolatile;

	return true;
	}
	case Intrinsic::amdgcn_buffer_atomic_fadd: {
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());

	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
	Info.ptrVal = MFI->getBufferPSV(TM);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore;

	const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(4));
	if (!Vol \|\| !Vol->isZero())
	Info.flags \|= MachineMemOperand::MOVolatile;

	return true;
	}
	case Intrinsic::amdgcn_ds_append:
	case Intrinsic::amdgcn_ds_consume: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getType());
	Info.ptrVal = CI.getOperand(0);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore;

	const ConstantInt *Vol = cast<ConstantInt>(CI.getOperand(1));
	if (!Vol->isZero())
	Info.flags \|= MachineMemOperand::MOVolatile;

	return true;
	}
	case Intrinsic::amdgcn_global_atomic_csub: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getType());
	Info.ptrVal = CI.getOperand(0);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \|
	MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::amdgcn_image_bvh_intersect_ray: {
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());

	Info.ptrVal = MFI->getImagePSV(TM);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \|
	MachineMemOperand::MODereferenceable;
	return true;
	}
	case Intrinsic::amdgcn_global_atomic_fadd:
	case Intrinsic::amdgcn_global_atomic_fmin:
	case Intrinsic::amdgcn_global_atomic_fmax:
	case Intrinsic::amdgcn_flat_atomic_fadd:
	case Intrinsic::amdgcn_flat_atomic_fmin:
	case Intrinsic::amdgcn_flat_atomic_fmax:
	case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
	case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16: {
	Info.opc = ISD::INTRINSIC_W_CHAIN;
	Info.memVT = MVT::getVT(CI.getType());
	Info.ptrVal = CI.getOperand(0);
	Info.align.reset();
	Info.flags \|= MachineMemOperand::MOLoad \|
	MachineMemOperand::MOStore \|
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	case Intrinsic::amdgcn_ds_gws_init:
	case Intrinsic::amdgcn_ds_gws_barrier:
	case Intrinsic::amdgcn_ds_gws_sema_v:
	case Intrinsic::amdgcn_ds_gws_sema_br:
	case Intrinsic::amdgcn_ds_gws_sema_p:
	case Intrinsic::amdgcn_ds_gws_sema_release_all: {
	Info.opc = ISD::INTRINSIC_VOID;

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());

	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	Info.ptrVal = MFI->getGWSPSV(TM);

	// This is an abstract access, but we need to specify a type and size.
	Info.memVT = MVT::i32;
	Info.size = 4;
	Info.align = Align(4);

	if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
	Info.flags \|= MachineMemOperand::MOLoad;
	else
	Info.flags \|= MachineMemOperand::MOStore;
	return true;
	}
	case Intrinsic::amdgcn_global_load_lds: {
	Info.opc = ISD::INTRINSIC_VOID;
	unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
	Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
	Info.flags \|= MachineMemOperand::MOLoad \| MachineMemOperand::MOStore \|
	MachineMemOperand::MOVolatile;
	return true;
	}
	default:
	return false;
	}
	}

	bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
	SmallVectorImpl<Value*> &Ops,
	Type *&AccessTy) const {
	switch (II->getIntrinsicID()) {
	case Intrinsic::amdgcn_atomic_inc:
	case Intrinsic::amdgcn_atomic_dec:
	case Intrinsic::amdgcn_ds_ordered_add:
	case Intrinsic::amdgcn_ds_ordered_swap:
	case Intrinsic::amdgcn_ds_append:
	case Intrinsic::amdgcn_ds_consume:
	case Intrinsic::amdgcn_ds_fadd:
	case Intrinsic::amdgcn_ds_fmin:
	case Intrinsic::amdgcn_ds_fmax:
	case Intrinsic::amdgcn_global_atomic_fadd:
	case Intrinsic::amdgcn_flat_atomic_fadd:
	case Intrinsic::amdgcn_flat_atomic_fmin:
	case Intrinsic::amdgcn_flat_atomic_fmax:
	case Intrinsic::amdgcn_global_atomic_fadd_v2bf16:
	case Intrinsic::amdgcn_flat_atomic_fadd_v2bf16:
	case Intrinsic::amdgcn_global_atomic_csub: {
	Value *Ptr = II->getArgOperand(0);
	AccessTy = II->getType();
	Ops.push_back(Ptr);
	return true;
	}
	default:
	return false;
	}
	}

	bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
	if (!Subtarget->hasFlatInstOffsets()) {
	// Flat instructions do not have offsets, and only have the register
	// address.
	return AM.BaseOffs == 0 && AM.Scale == 0;
	}

	return AM.Scale == 0 &&
	(AM.BaseOffs == 0 \|\|
	Subtarget->getInstrInfo()->isLegalFLATOffset(
	AM.BaseOffs, AMDGPUAS::FLAT_ADDRESS, SIInstrFlags::FLAT));
	}

	bool SITargetLowering::isLegalGlobalAddressingMode(const AddrMode &AM) const {
	if (Subtarget->hasFlatGlobalInsts())
	return AM.Scale == 0 &&
	(AM.BaseOffs == 0 \|\| Subtarget->getInstrInfo()->isLegalFLATOffset(
	AM.BaseOffs, AMDGPUAS::GLOBAL_ADDRESS,
	SIInstrFlags::FlatGlobal));

	if (!Subtarget->hasAddr64() \|\| Subtarget->useFlatForGlobal()) {
	// Assume the we will use FLAT for all global memory accesses
	// on VI.
	// FIXME: This assumption is currently wrong. On VI we still use
	// MUBUF instructions for the r + i addressing mode. As currently
	// implemented, the MUBUF instructions only work on buffer < 4GB.
	// It may be possible to support > 4GB buffers with MUBUF instructions,
	// by setting the stride value in the resource descriptor which would
	// increase the size limit to (stride * 4GB). However, this is risky,
	// because it has never been validated.
	return isLegalFlatAddressingMode(AM);
	}

	return isLegalMUBUFAddressingMode(AM);
	}

	bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
	// MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
	// additionally can do r + r + i with addr64. 32-bit has more addressing
	// mode options. Depending on the resource constant, it can also do
	// (i64 r0) + (i32 r1) * (i14 i).
	//
	// Private arrays end up using a scratch buffer most of the time, so also
	// assume those use MUBUF instructions. Scratch loads / stores are currently
	// implemented as mubuf instructions with offen bit set, so slightly
	// different than the normal addr64.
	if (!SIInstrInfo::isLegalMUBUFImmOffset(AM.BaseOffs))
	return false;

	// FIXME: Since we can split immediate into soffset and immediate offset,
	// would it make sense to allow any immediate?

	switch (AM.Scale) {
	case 0: // r + i or just i, depending on HasBaseReg.
	return true;
	case 1:
	return true; // We have r + r or r + i.
	case 2:
	if (AM.HasBaseReg) {
	// Reject 2 * r + r.
	return false;
	}

	// Allow 2 * r as r + r
	// Or 2 * r + i is allowed as r + r + i.
	return true;
	default: // Don't allow n * r
	return false;
	}
	}

	bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
	const AddrMode &AM, Type *Ty,
	unsigned AS, Instruction *I) const {
	// No global is ever allowed as a base.
	if (AM.BaseGV)
	return false;

	if (AS == AMDGPUAS::GLOBAL_ADDRESS)
	return isLegalGlobalAddressingMode(AM);

	if (AS == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT \|\|
	AS == AMDGPUAS::BUFFER_FAT_POINTER) {
	// If the offset isn't a multiple of 4, it probably isn't going to be
	// correctly aligned.
	// FIXME: Can we get the real alignment here?
	if (AM.BaseOffs % 4 != 0)
	return isLegalMUBUFAddressingMode(AM);

	// There are no SMRD extloads, so if we have to do a small type access we
	// will use a MUBUF load.
	// FIXME?: We also need to do this if unaligned, but we don't know the
	// alignment here.
	if (Ty->isSized() && DL.getTypeStoreSize(Ty) < 4)
	return isLegalGlobalAddressingMode(AM);

	if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
	// SMRD instructions have an 8-bit, dword offset on SI.
	if (!isUInt<8>(AM.BaseOffs / 4))
	return false;
	} else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
	// On CI+, this can also be a 32-bit literal constant offset. If it fits
	// in 8-bits, it can use a smaller encoding.
	if (!isUInt<32>(AM.BaseOffs / 4))
	return false;
	} else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
	// On VI, these use the SMEM format and the offset is 20-bit in bytes.
	if (!isUInt<20>(AM.BaseOffs))
	return false;
	} else
	llvm_unreachable("unhandled generation");

	if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
	return true;

	if (AM.Scale == 1 && AM.HasBaseReg)
	return true;

	return false;

	} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
	return isLegalMUBUFAddressingMode(AM);
	} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\|
	AS == AMDGPUAS::REGION_ADDRESS) {
	// Basic, single offset DS instructions allow a 16-bit unsigned immediate
	// field.
	// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
	// an 8-bit dword offset but we don't know the alignment here.
	if (!isUInt<16>(AM.BaseOffs))
	return false;

	if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
	return true;

	if (AM.Scale == 1 && AM.HasBaseReg)
	return true;

	return false;
	} else if (AS == AMDGPUAS::FLAT_ADDRESS \|\|
	AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
	// For an unknown address space, this usually means that this is for some
	// reason being used for pure arithmetic, and not based on some addressing
	// computation. We don't have instructions that compute pointers with any
	// addressing modes, so treat them as having no offset like flat
	// instructions.
	return isLegalFlatAddressingMode(AM);
	}

	// Assume a user alias of global for unknown address spaces.
	return isLegalGlobalAddressingMode(AM);
	}

	bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
	const MachineFunction &MF) const {
	if (AS == AMDGPUAS::GLOBAL_ADDRESS \|\| AS == AMDGPUAS::FLAT_ADDRESS) {
	return (MemVT.getSizeInBits() <= 4 * 32);
	} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
	unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize();
	return (MemVT.getSizeInBits() <= MaxPrivateBits);
	} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
	return (MemVT.getSizeInBits() <= 2 * 32);
	}
	return true;
	}

	bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
	unsigned Size, unsigned AddrSpace, Align Alignment,
	MachineMemOperand::Flags Flags, bool *IsFast) const {
	if (IsFast)
	*IsFast = false;

	if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
	AddrSpace == AMDGPUAS::REGION_ADDRESS) {
	// Check if alignment requirements for ds_read/write instructions are
	// disabled.
	if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
	return false;

	Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
	if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
	Alignment < RequiredAlignment)
	return false;

	// Either, the alignment requirements are "enabled", or there is an
	// unaligned LDS access related hardware bug though alignment requirements
	// are "disabled". In either case, we need to check for proper alignment
	// requirements.
	//
	switch (Size) {
	case 64:
	// SI has a hardware bug in the LDS / GDS bounds checking: if the base
	// address is negative, then the instruction is incorrectly treated as
	// out-of-bounds even if base + offsets is in bounds. Split vectorized
	// loads here to avoid emitting ds_read2_b32. We may re-combine the
	// load later in the SILoadStoreOptimizer.
	if (!Subtarget->hasUsableDSOffset() && Alignment < Align(8))
	return false;

	// 8 byte accessing via ds_read/write_b64 require 8-byte alignment, but we
	// can do a 4 byte aligned, 8 byte access in a single operation using
	// ds_read2/write2_b32 with adjacent offsets.
	RequiredAlignment = Align(4);

	if (Subtarget->hasUnalignedDSAccessEnabled()) {
	// We will either select ds_read_b64/ds_write_b64 or ds_read2_b32/
	// ds_write2_b32 depending on the alignment. In either case with either
	// alignment there is no faster way of doing this.
	if (IsFast)
	*IsFast = true;
	return true;
	}

	break;
	case 96:
	if (!Subtarget->hasDS96AndDS128())
	return false;

	// 12 byte accessing via ds_read/write_b96 require 16-byte alignment on
	// gfx8 and older.

	if (Subtarget->hasUnalignedDSAccessEnabled()) {
	// Naturally aligned access is fastest. However, also report it is Fast
	// if memory is aligned less than DWORD. A narrow load or store will be
	// be equally slow as a single ds_read_b96/ds_write_b96, but there will
	// be more of them, so overall we will pay less penalty issuing a single
	// instruction.
	if (IsFast)
	*IsFast = Alignment >= RequiredAlignment \|\| Alignment < Align(4);
	return true;
	}

	break;
	case 128:
	if (!Subtarget->hasDS96AndDS128() \|\| !Subtarget->useDS128())
	return false;

	// 16 byte accessing via ds_read/write_b128 require 16-byte alignment on
	// gfx8 and older, but we can do a 8 byte aligned, 16 byte access in a
	// single operation using ds_read2/write2_b64.
	RequiredAlignment = Align(8);

	if (Subtarget->hasUnalignedDSAccessEnabled()) {
	// Naturally aligned access is fastest. However, also report it is Fast
	// if memory is aligned less than DWORD. A narrow load or store will be
	// be equally slow as a single ds_read_b128/ds_write_b128, but there
	// will be more of them, so overall we will pay less penalty issuing a
	// single instruction.
	if (IsFast)
	*IsFast = Alignment >= RequiredAlignment \|\| Alignment < Align(4);
	return true;
	}

	break;
	default:
	if (Size > 32)
	return false;

	break;
	}

	if (IsFast)
	*IsFast = Alignment >= RequiredAlignment;

	return Alignment >= RequiredAlignment \|\|
	Subtarget->hasUnalignedDSAccessEnabled();
	}

	if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
	bool AlignedBy4 = Alignment >= Align(4);
	if (IsFast)
	*IsFast = AlignedBy4;

	return AlignedBy4 \|\|
	Subtarget->enableFlatScratch() \|\|
	Subtarget->hasUnalignedScratchAccess();
	}

	// FIXME: We have to be conservative here and assume that flat operations
	// will access scratch. If we had access to the IR function, then we
	// could determine if any private memory was used in the function.
	if (AddrSpace == AMDGPUAS::FLAT_ADDRESS &&
	!Subtarget->hasUnalignedScratchAccess()) {
	bool AlignedBy4 = Alignment >= Align(4);
	if (IsFast)
	*IsFast = AlignedBy4;

	return AlignedBy4;
	}

	if (Subtarget->hasUnalignedBufferAccessEnabled()) {
	// If we have a uniform constant load, it still requires using a slow
	// buffer instruction if unaligned.
	if (IsFast) {
	// Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
	// 2-byte alignment is worse than 1 unless doing a 2-byte access.
	*IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
	Alignment >= Align(4) : Alignment != Align(2);
	}

	return true;
	}

	// Smaller than dword value must be aligned.
	if (Size < 32)
	return false;

	// 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
	// byte-address are ignored, thus forcing Dword alignment.
	// This applies to private, global, and constant memory.
	if (IsFast)
	*IsFast = true;

	return Size >= 32 && Alignment >= Align(4);
	}

	bool SITargetLowering::allowsMisalignedMemoryAccesses(
	EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
	bool *IsFast) const {
	bool Allow = allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AddrSpace,
	Alignment, Flags, IsFast);

	if (Allow && IsFast && Subtarget->hasUnalignedDSAccessEnabled() &&
	(AddrSpace == AMDGPUAS::LOCAL_ADDRESS \|\|
	AddrSpace == AMDGPUAS::REGION_ADDRESS)) {
	// Lie it is fast if +unaligned-access-mode is passed so that DS accesses
	// get vectorized. We could use ds_read2_b/ds_write2_b instructions on a
	// misaligned data which is faster than a pair of ds_read_b/ds_write_b
	// which would be equally misaligned.
	// This is only used by the common passes, selection always calls the
	// allowsMisalignedMemoryAccessesImpl version.
	*IsFast = true;
	}

	return Allow;
	}

	EVT SITargetLowering::getOptimalMemOpType(
	const MemOp &Op, const AttributeList &FuncAttributes) const {
	// FIXME: Should account for address space here.

	// The default fallback uses the private pointer size as a guess for a type to
	// use. Make sure we switch these to 64-bit accesses.

	if (Op.size() >= 16 &&
	Op.isDstAligned(Align(4))) // XXX: Should only do for global
	return MVT::v4i32;

	if (Op.size() >= 8 && Op.isDstAligned(Align(4)))
	return MVT::v2i32;

	// Use the default.
	return MVT::Other;
	}

	bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
	const MemSDNode *MemNode = cast<MemSDNode>(N);
	return MemNode->getMemOperand()->getFlags() & MONoClobber;
	}

	bool SITargetLowering::isNonGlobalAddrSpace(unsigned AS) {
	return AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS \|\|
	AS == AMDGPUAS::PRIVATE_ADDRESS;
	}

	bool SITargetLowering::isFreeAddrSpaceCast(unsigned SrcAS,
	unsigned DestAS) const {
	// Flat -> private/local is a simple truncate.
	// Flat -> global is no-op
	if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
	return true;

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());
	return TM.isNoopAddrSpaceCast(SrcAS, DestAS);
	}

	bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
	const MemSDNode *MemNode = cast<MemSDNode>(N);

	return AMDGPUInstrInfo::isUniformMMO(MemNode->getMemOperand());
	}

	TargetLoweringBase::LegalizeTypeAction
	SITargetLowering::getPreferredVectorAction(MVT VT) const {
	if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
	VT.getScalarType().bitsLE(MVT::i16))
	return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector;
	return TargetLoweringBase::getPreferredVectorAction(VT);
	}

	bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
	Type *Ty) const {
	// FIXME: Could be smarter if called for vector constants.
	return true;
	}

	bool SITargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
	unsigned Index) const {
	if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
	return false;

	// TODO: Add more cases that are cheap.
	return Index == 0;
	}

	bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
	if (Subtarget->has16BitInsts() && VT == MVT::i16) {
	switch (Op) {
	case ISD::LOAD:
	case ISD::STORE:

	// These operations are done with 32-bit instructions anyway.
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	case ISD::SELECT:
	// TODO: Extensions?
	return true;
	default:
	return false;
	}
	}

	// SimplifySetCC uses this function to determine whether or not it should
	// create setcc with i1 operands. We don't have instructions for i1 setcc.
	if (VT == MVT::i1 && Op == ISD::SETCC)
	return false;

	return TargetLowering::isTypeDesirableForOp(Op, VT);
	}

	SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
	const SDLoc &SL,
	SDValue Chain,
	uint64_t Offset) const {
	const DataLayout &DL = DAG.getDataLayout();
	MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	const ArgDescriptor *InputPtrReg;
	const TargetRegisterClass *RC;
	LLT ArgTy;
	MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);

	std::tie(InputPtrReg, RC, ArgTy) =
	Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);

	// We may not have the kernarg segment argument if we have no kernel
	// arguments.
	if (!InputPtrReg)
	return DAG.getConstant(0, SL, PtrVT);

	MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
	SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
	MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);

	return DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Offset));
	}

	SDValue SITargetLowering::getImplicitArgPtr(SelectionDAG &DAG,
	const SDLoc &SL) const {
	uint64_t Offset = getImplicitParameterOffset(DAG.getMachineFunction(),
	FIRST_IMPLICIT);
	return lowerKernArgParameterPtr(DAG, SL, DAG.getEntryNode(), Offset);
	}

	SDValue SITargetLowering::getLDSKernelId(SelectionDAG &DAG,
	const SDLoc &SL) const {

	Function &F = DAG.getMachineFunction().getFunction();
	Optional<uint32_t> KnownSize =
	AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
	if (KnownSize.has_value())
	return DAG.getConstant(KnownSize.value(), SL, MVT::i32);
	return SDValue();
	}

	SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
	const SDLoc &SL, SDValue Val,
	bool Signed,
	const ISD::InputArg *Arg) const {
	// First, if it is a widened vector, narrow it.
	if (VT.isVector() &&
	VT.getVectorNumElements() != MemVT.getVectorNumElements()) {
	EVT NarrowedVT =
	EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(),
	VT.getVectorNumElements());
	Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, NarrowedVT, Val,
	DAG.getConstant(0, SL, MVT::i32));
	}

	// Then convert the vector elements or scalar value.
	if (Arg && (Arg->Flags.isSExt() \|\| Arg->Flags.isZExt()) &&
	VT.bitsLT(MemVT)) {
	unsigned Opc = Arg->Flags.isZExt() ? ISD::AssertZext : ISD::AssertSext;
	Val = DAG.getNode(Opc, SL, MemVT, Val, DAG.getValueType(VT));
	}

	if (MemVT.isFloatingPoint())
	Val = getFPExtOrFPRound(DAG, Val, SL, VT);
	else if (Signed)
	Val = DAG.getSExtOrTrunc(Val, SL, VT);
	else
	Val = DAG.getZExtOrTrunc(Val, SL, VT);

	return Val;
	}

	SDValue SITargetLowering::lowerKernargMemParameter(
	SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain,
	uint64_t Offset, Align Alignment, bool Signed,
	const ISD::InputArg *Arg) const {
	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

	// Try to avoid using an extload by loading earlier than the argument address,
	// and extracting the relevant bits. The load should hopefully be merged with
	// the previous argument.
	if (MemVT.getStoreSize() < 4 && Alignment < 4) {
	// TODO: Handle align < 4 and size >= 4 (can happen with packed structs).
	int64_t AlignDownOffset = alignDown(Offset, 4);
	int64_t OffsetDiff = Offset - AlignDownOffset;

	EVT IntVT = MemVT.changeTypeToInteger();

	// TODO: If we passed in the base kernel offset we could have a better
	// alignment than 4, but we don't really need it.
	SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, AlignDownOffset);
	SDValue Load = DAG.getLoad(MVT::i32, SL, Chain, Ptr, PtrInfo, Align(4),
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);

	SDValue ShiftAmt = DAG.getConstant(OffsetDiff * 8, SL, MVT::i32);
	SDValue Extract = DAG.getNode(ISD::SRL, SL, MVT::i32, Load, ShiftAmt);

	SDValue ArgVal = DAG.getNode(ISD::TRUNCATE, SL, IntVT, Extract);
	ArgVal = DAG.getNode(ISD::BITCAST, SL, MemVT, ArgVal);
	ArgVal = convertArgType(DAG, VT, MemVT, SL, ArgVal, Signed, Arg);


	return DAG.getMergeValues({ ArgVal, Load.getValue(1) }, SL);
	}

	SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
	SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Alignment,
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);

	SDValue Val = convertArgType(DAG, VT, MemVT, SL, Load, Signed, Arg);
	return DAG.getMergeValues({ Val, Load.getValue(1) }, SL);
	}

	SDValue SITargetLowering::lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
	const SDLoc &SL, SDValue Chain,
	const ISD::InputArg &Arg) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();

	if (Arg.Flags.isByVal()) {
	unsigned Size = Arg.Flags.getByValSize();
	int FrameIdx = MFI.CreateFixedObject(Size, VA.getLocMemOffset(), false);
	return DAG.getFrameIndex(FrameIdx, MVT::i32);
	}

	unsigned ArgOffset = VA.getLocMemOffset();
	unsigned ArgSize = VA.getValVT().getStoreSize();

	int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, true);

	// Create load nodes to retrieve arguments from the stack.
	SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
	SDValue ArgValue;

	// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
	ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
	MVT MemVT = VA.getValVT();

	switch (VA.getLocInfo()) {
	default:
	break;
	case CCValAssign::BCvt:
	MemVT = VA.getLocVT();
	break;
	case CCValAssign::SExt:
	ExtType = ISD::SEXTLOAD;
	break;
	case CCValAssign::ZExt:
	ExtType = ISD::ZEXTLOAD;
	break;
	case CCValAssign::AExt:
	ExtType = ISD::EXTLOAD;
	break;
	}

	ArgValue = DAG.getExtLoad(
	ExtType, SL, VA.getLocVT(), Chain, FIN,
	MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
	MemVT);
	return ArgValue;
	}

	SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
	const SIMachineFunctionInfo &MFI,
	EVT VT,
	AMDGPUFunctionArgInfo::PreloadedValue PVID) const {
	const ArgDescriptor *Reg;
	const TargetRegisterClass *RC;
	LLT Ty;

	std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
	if (!Reg) {
	if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
	// It's possible for a kernarg intrinsic call to appear in a kernel with
	// no allocated segment, in which case we do not add the user sgpr
	// argument, so just return null.
	return DAG.getConstant(0, SDLoc(), VT);
	}

	// It's undefined behavior if a function marked with the amdgpu-no-*
	// attributes uses the corresponding intrinsic.
	return DAG.getUNDEF(VT);
	}

	return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
	}

	static void processPSInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
	CallingConv::ID CallConv,
	ArrayRef<ISD::InputArg> Ins, BitVector &Skipped,
	FunctionType *FType,
	SIMachineFunctionInfo *Info) {
	for (unsigned I = 0, E = Ins.size(), PSInputNum = 0; I != E; ++I) {
	const ISD::InputArg *Arg = &Ins[I];

	assert((!Arg->VT.isVector() \|\| Arg->VT.getScalarSizeInBits() == 16) &&
	"vector type argument should have been split");

	// First check if it's a PS input addr.
	if (CallConv == CallingConv::AMDGPU_PS &&
	!Arg->Flags.isInReg() && PSInputNum <= 15) {
	bool SkipArg = !Arg->Used && !Info->isPSInputAllocated(PSInputNum);

	// Inconveniently only the first part of the split is marked as isSplit,
	// so skip to the end. We only want to increment PSInputNum once for the
	// entire split argument.
	if (Arg->Flags.isSplit()) {
	while (!Arg->Flags.isSplitEnd()) {
	assert((!Arg->VT.isVector() \|\|
	Arg->VT.getScalarSizeInBits() == 16) &&
	"unexpected vector split in ps argument type");
	if (!SkipArg)
	Splits.push_back(*Arg);
	Arg = &Ins[++I];
	}
	}

	if (SkipArg) {
	// We can safely skip PS inputs.
	Skipped.set(Arg->getOrigArgIndex());
	++PSInputNum;
	continue;
	}

	Info->markPSInputAllocated(PSInputNum);
	if (Arg->Used)
	Info->markPSInputEnabled(PSInputNum);

	++PSInputNum;
	}

	Splits.push_back(*Arg);
	}
	}

	// Allocate special inputs passed in VGPRs.
	void SITargetLowering::allocateSpecialEntryInputVGPRs(CCState &CCInfo,
	MachineFunction &MF,
	const SIRegisterInfo &TRI,
	SIMachineFunctionInfo &Info) const {
	const LLT S32 = LLT::scalar(32);
	MachineRegisterInfo &MRI = MF.getRegInfo();

	if (Info.hasWorkItemIDX()) {
	Register Reg = AMDGPU::VGPR0;
	MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);

	CCInfo.AllocateReg(Reg);
	unsigned Mask = (Subtarget->hasPackedTID() &&
	Info.hasWorkItemIDY()) ? 0x3ff : ~0u;
	Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg, Mask));
	}

	if (Info.hasWorkItemIDY()) {
	assert(Info.hasWorkItemIDX());
	if (Subtarget->hasPackedTID()) {
	Info.setWorkItemIDY(ArgDescriptor::createRegister(AMDGPU::VGPR0,
	0x3ff << 10));
	} else {
	unsigned Reg = AMDGPU::VGPR1;
	MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);

	CCInfo.AllocateReg(Reg);
	Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
	}
	}

	if (Info.hasWorkItemIDZ()) {
	assert(Info.hasWorkItemIDX() && Info.hasWorkItemIDY());
	if (Subtarget->hasPackedTID()) {
	Info.setWorkItemIDZ(ArgDescriptor::createRegister(AMDGPU::VGPR0,
	0x3ff << 20));
	} else {
	unsigned Reg = AMDGPU::VGPR2;
	MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);

	CCInfo.AllocateReg(Reg);
	Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
	}
	}
	}

	// Try to allocate a VGPR at the end of the argument list, or if no argument
	// VGPRs are left allocating a stack slot.
	// If \p Mask is is given it indicates bitfield position in the register.
	// If \p Arg is given use it with new ]p Mask instead of allocating new.
	static ArgDescriptor allocateVGPR32Input(CCState &CCInfo, unsigned Mask = ~0u,
	ArgDescriptor Arg = ArgDescriptor()) {
	if (Arg.isSet())
	return ArgDescriptor::createArg(Arg, Mask);

	ArrayRef<MCPhysReg> ArgVGPRs
	= makeArrayRef(AMDGPU::VGPR_32RegClass.begin(), 32);
	unsigned RegIdx = CCInfo.getFirstUnallocated(ArgVGPRs);
	if (RegIdx == ArgVGPRs.size()) {
	// Spill to stack required.
	int64_t Offset = CCInfo.AllocateStack(4, Align(4));

	return ArgDescriptor::createStack(Offset, Mask);
	}

	unsigned Reg = ArgVGPRs[RegIdx];
	Reg = CCInfo.AllocateReg(Reg);
	assert(Reg != AMDGPU::NoRegister);

	MachineFunction &MF = CCInfo.getMachineFunction();
	Register LiveInVReg = MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
	MF.getRegInfo().setType(LiveInVReg, LLT::scalar(32));
	return ArgDescriptor::createRegister(Reg, Mask);
	}

	static ArgDescriptor allocateSGPR32InputImpl(CCState &CCInfo,
	const TargetRegisterClass *RC,
	unsigned NumArgRegs) {
	ArrayRef<MCPhysReg> ArgSGPRs = makeArrayRef(RC->begin(), 32);
	unsigned RegIdx = CCInfo.getFirstUnallocated(ArgSGPRs);
	if (RegIdx == ArgSGPRs.size())
	report_fatal_error("ran out of SGPRs for arguments");

	unsigned Reg = ArgSGPRs[RegIdx];
	Reg = CCInfo.AllocateReg(Reg);
	assert(Reg != AMDGPU::NoRegister);

	MachineFunction &MF = CCInfo.getMachineFunction();
	MF.addLiveIn(Reg, RC);
	return ArgDescriptor::createRegister(Reg);
	}

	// If this has a fixed position, we still should allocate the register in the
	// CCInfo state. Technically we could get away with this for values passed
	// outside of the normal argument range.
	static void allocateFixedSGPRInputImpl(CCState &CCInfo,
	const TargetRegisterClass *RC,
	MCRegister Reg) {
	Reg = CCInfo.AllocateReg(Reg);
	assert(Reg != AMDGPU::NoRegister);
	MachineFunction &MF = CCInfo.getMachineFunction();
	MF.addLiveIn(Reg, RC);
	}

	static void allocateSGPR32Input(CCState &CCInfo, ArgDescriptor &Arg) {
	if (Arg) {
	allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_32RegClass,
	Arg.getRegister());
	} else
	Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_32RegClass, 32);
	}

	static void allocateSGPR64Input(CCState &CCInfo, ArgDescriptor &Arg) {
	if (Arg) {
	allocateFixedSGPRInputImpl(CCInfo, &AMDGPU::SGPR_64RegClass,
	Arg.getRegister());
	} else
	Arg = allocateSGPR32InputImpl(CCInfo, &AMDGPU::SGPR_64RegClass, 16);
	}

	/// Allocate implicit function VGPR arguments at the end of allocated user
	/// arguments.
	void SITargetLowering::allocateSpecialInputVGPRs(
	CCState &CCInfo, MachineFunction &MF,
	const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
	const unsigned Mask = 0x3ff;
	ArgDescriptor Arg;

	if (Info.hasWorkItemIDX()) {
	Arg = allocateVGPR32Input(CCInfo, Mask);
	Info.setWorkItemIDX(Arg);
	}

	if (Info.hasWorkItemIDY()) {
	Arg = allocateVGPR32Input(CCInfo, Mask << 10, Arg);
	Info.setWorkItemIDY(Arg);
	}

	if (Info.hasWorkItemIDZ())
	Info.setWorkItemIDZ(allocateVGPR32Input(CCInfo, Mask << 20, Arg));
	}

	/// Allocate implicit function VGPR arguments in fixed registers.
	void SITargetLowering::allocateSpecialInputVGPRsFixed(
	CCState &CCInfo, MachineFunction &MF,
	const SIRegisterInfo &TRI, SIMachineFunctionInfo &Info) const {
	Register Reg = CCInfo.AllocateReg(AMDGPU::VGPR31);
	if (!Reg)
	report_fatal_error("failed to allocated VGPR for implicit arguments");

	const unsigned Mask = 0x3ff;
	Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg, Mask));
	Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg, Mask << 10));
	Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg, Mask << 20));
	}

	void SITargetLowering::allocateSpecialInputSGPRs(
	CCState &CCInfo,
	MachineFunction &MF,
	const SIRegisterInfo &TRI,
	SIMachineFunctionInfo &Info) const {
	auto &ArgInfo = Info.getArgInfo();

	// TODO: Unify handling with private memory pointers.
	if (Info.hasDispatchPtr())
	allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);

	if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
	allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);

	// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
	// constant offset from the kernarg segment.
	if (Info.hasImplicitArgPtr())
	allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);

	if (Info.hasDispatchID())
	allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);

	// flat_scratch_init is not applicable for non-kernel functions.

	if (Info.hasWorkGroupIDX())
	allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);

	if (Info.hasWorkGroupIDY())
	allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);

	if (Info.hasWorkGroupIDZ())
	allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);

	if (Info.hasLDSKernelId())
	allocateSGPR32Input(CCInfo, ArgInfo.LDSKernelId);
	}

	// Allocate special inputs passed in user SGPRs.
	void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
	MachineFunction &MF,
	const SIRegisterInfo &TRI,
	SIMachineFunctionInfo &Info) const {
	if (Info.hasImplicitBufferPtr()) {
	Register ImplicitBufferPtrReg = Info.addImplicitBufferPtr(TRI);
	MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
	CCInfo.AllocateReg(ImplicitBufferPtrReg);
	}

	// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
	if (Info.hasPrivateSegmentBuffer()) {
	Register PrivateSegmentBufferReg = Info.addPrivateSegmentBuffer(TRI);
	MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SGPR_128RegClass);
	CCInfo.AllocateReg(PrivateSegmentBufferReg);
	}

	if (Info.hasDispatchPtr()) {
	Register DispatchPtrReg = Info.addDispatchPtr(TRI);
	MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
	CCInfo.AllocateReg(DispatchPtrReg);
	}

	if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
	Register QueuePtrReg = Info.addQueuePtr(TRI);
	MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
	CCInfo.AllocateReg(QueuePtrReg);
	}

	if (Info.hasKernargSegmentPtr()) {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	Register InputPtrReg = Info.addKernargSegmentPtr(TRI);
	CCInfo.AllocateReg(InputPtrReg);

	Register VReg = MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
	MRI.setType(VReg, LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
	}

	if (Info.hasDispatchID()) {
	Register DispatchIDReg = Info.addDispatchID(TRI);
	MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
	CCInfo.AllocateReg(DispatchIDReg);
	}

	if (Info.hasFlatScratchInit() && !getSubtarget()->isAmdPalOS()) {
	Register FlatScratchInitReg = Info.addFlatScratchInit(TRI);
	MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
	CCInfo.AllocateReg(FlatScratchInitReg);
	}

	if (Info.hasLDSKernelId()) {
	Register Reg = Info.addLDSKernelId();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}

	// TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
	// these from the dispatch pointer.
	}

	// Allocate special input registers that are initialized per-wave.
	void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
	MachineFunction &MF,
	SIMachineFunctionInfo &Info,
	CallingConv::ID CallConv,
	bool IsShader) const {
	if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
	// Note: user SGPRs are handled by the front-end for graphics shaders
	// Pad up the used user SGPRs with dead inputs.
	unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();

	// Note we do not count the PrivateSegmentWaveByteOffset. We do not want to
	// rely on it to reach 16 since if we end up having no stack usage, it will
	// not really be added.
	unsigned NumRequiredSystemSGPRs = Info.hasWorkGroupIDX() +
	Info.hasWorkGroupIDY() +
	Info.hasWorkGroupIDZ() +
	Info.hasWorkGroupInfo();
	for (unsigned i = NumRequiredSystemSGPRs + CurrentUserSGPRs; i < 16; ++i) {
	Register Reg = Info.addReservedUserSGPR();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}
	}

	if (Info.hasWorkGroupIDX()) {
	Register Reg = Info.addWorkGroupIDX();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}

	if (Info.hasWorkGroupIDY()) {
	Register Reg = Info.addWorkGroupIDY();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}

	if (Info.hasWorkGroupIDZ()) {
	Register Reg = Info.addWorkGroupIDZ();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}

	if (Info.hasWorkGroupInfo()) {
	Register Reg = Info.addWorkGroupInfo();
	MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(Reg);
	}

	if (Info.hasPrivateSegmentWaveByteOffset()) {
	// Scratch wave offset passed in system SGPR.
	unsigned PrivateSegmentWaveByteOffsetReg;

	if (IsShader) {
	PrivateSegmentWaveByteOffsetReg =
	Info.getPrivateSegmentWaveByteOffsetSystemSGPR();

	// This is true if the scratch wave byte offset doesn't have a fixed
	// location.
	if (PrivateSegmentWaveByteOffsetReg == AMDGPU::NoRegister) {
	PrivateSegmentWaveByteOffsetReg = findFirstFreeSGPR(CCInfo);
	Info.setPrivateSegmentWaveByteOffset(PrivateSegmentWaveByteOffsetReg);
	}
	} else
	PrivateSegmentWaveByteOffsetReg = Info.addPrivateSegmentWaveByteOffset();

	MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
	CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
	}

	assert(!Subtarget->hasUserSGPRInit16Bug() \|\| IsShader \|\|
	Info.getNumPreloadedSGPRs() >= 16);
	}

	static void reservePrivateMemoryRegs(const TargetMachine &TM,
	MachineFunction &MF,
	const SIRegisterInfo &TRI,
	SIMachineFunctionInfo &Info) {
	// Now that we've figured out where the scratch register inputs are, see if
	// should reserve the arguments and use them directly.
	MachineFrameInfo &MFI = MF.getFrameInfo();
	bool HasStackObjects = MFI.hasStackObjects();
	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

	// Record that we know we have non-spill stack objects so we don't need to
	// check all stack objects later.
	if (HasStackObjects)
	Info.setHasNonSpillStackObjects(true);

	// Everything live out of a block is spilled with fast regalloc, so it's
	// almost certain that spilling will be required.
	if (TM.getOptLevel() == CodeGenOpt::None)
	HasStackObjects = true;

	// For now assume stack access is needed in any callee functions, so we need
	// the scratch registers to pass in.
	bool RequiresStackAccess = HasStackObjects \|\| MFI.hasCalls();

	if (!ST.enableFlatScratch()) {
	if (RequiresStackAccess && ST.isAmdHsaOrMesa(MF.getFunction())) {
	// If we have stack objects, we unquestionably need the private buffer
	// resource. For the Code Object V2 ABI, this will be the first 4 user
	// SGPR inputs. We can reserve those and use them directly.

	Register PrivateSegmentBufferReg =
	Info.getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);
	Info.setScratchRSrcReg(PrivateSegmentBufferReg);
	} else {
	unsigned ReservedBufferReg = TRI.reservedPrivateSegmentBufferReg(MF);
	// We tentatively reserve the last registers (skipping the last registers
	// which may contain VCC, FLAT_SCR, and XNACK). After register allocation,
	// we'll replace these with the ones immediately after those which were
	// really allocated. In the prologue copies will be inserted from the
	// argument to these reserved registers.

	// Without HSA, relocations are used for the scratch pointer and the
	// buffer resource setup is always inserted in the prologue. Scratch wave
	// offset is still in an input SGPR.
	Info.setScratchRSrcReg(ReservedBufferReg);
	}
	}

	MachineRegisterInfo &MRI = MF.getRegInfo();

	// For entry functions we have to set up the stack pointer if we use it,
	// whereas non-entry functions get this "for free". This means there is no
	// intrinsic advantage to using S32 over S34 in cases where we do not have
	// calls but do need a frame pointer (i.e. if we are requested to have one
	// because frame pointer elimination is disabled). To keep things simple we
	// only ever use S32 as the call ABI stack pointer, and so using it does not
	// imply we need a separate frame pointer.
	//
	// Try to use s32 as the SP, but move it if it would interfere with input
	// arguments. This won't work with calls though.
	//
	// FIXME: Move SP to avoid any possible inputs, or find a way to spill input
	// registers.
	if (!MRI.isLiveIn(AMDGPU::SGPR32)) {
	Info.setStackPtrOffsetReg(AMDGPU::SGPR32);
	} else {
	assert(AMDGPU::isShader(MF.getFunction().getCallingConv()));

	if (MFI.hasCalls())
	report_fatal_error("call in graphics shader with too many input SGPRs");

	for (unsigned Reg : AMDGPU::SGPR_32RegClass) {
	if (!MRI.isLiveIn(Reg)) {
	Info.setStackPtrOffsetReg(Reg);
	break;
	}
	}

	if (Info.getStackPtrOffsetReg() == AMDGPU::SP_REG)
	report_fatal_error("failed to find register for SP");
	}

	// hasFP should be accurate for entry functions even before the frame is
	// finalized, because it does not rely on the known stack size, only
	// properties like whether variable sized objects are present.
	if (ST.getFrameLowering()->hasFP(MF)) {
	Info.setFrameOffsetReg(AMDGPU::SGPR33);
	}
	}

	bool SITargetLowering::supportSplitCSR(MachineFunction *MF) const {
	const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
	return !Info->isEntryFunction();
	}

	void SITargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {

	}

	void SITargetLowering::insertCopiesSplitCSR(
	MachineBasicBlock *Entry,
	const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
	const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();

	const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
	if (!IStart)
	return;

	const TargetInstrInfo *TII = Subtarget->getInstrInfo();
	MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
	MachineBasicBlock::iterator MBBI = Entry->begin();
	for (const MCPhysReg I = IStart; I; ++I) {
	const TargetRegisterClass *RC = nullptr;
	if (AMDGPU::SReg_64RegClass.contains(*I))
	RC = &AMDGPU::SGPR_64RegClass;
	else if (AMDGPU::SReg_32RegClass.contains(*I))
	RC = &AMDGPU::SGPR_32RegClass;
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");

	Register NewVR = MRI->createVirtualRegister(RC);
	// Create copy from CSR to a virtual register.
	Entry->addLiveIn(*I);
	BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
	.addReg(*I);

	// Insert the copy-back instructions right before the terminator.
	for (auto *Exit : Exits)
	BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
	TII->get(TargetOpcode::COPY), *I)
	.addReg(NewVR);
	}
	}

	SDValue SITargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();

	MachineFunction &MF = DAG.getMachineFunction();
	const Function &Fn = MF.getFunction();
	FunctionType *FType = MF.getFunction().getFunctionType();
	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	if (Subtarget->isAmdHsaOS() && AMDGPU::isGraphics(CallConv)) {
	DiagnosticInfoUnsupported NoGraphicsHSA(
	Fn, "unsupported non-compute shaders with HSA", DL.getDebugLoc());
	DAG.getContext()->diagnose(NoGraphicsHSA);
	return DAG.getEntryNode();
	}

	Info->allocateModuleLDSGlobal(Fn);

	SmallVector<ISD::InputArg, 16> Splits;
	SmallVector<CCValAssign, 16> ArgLocs;
	BitVector Skipped(Ins.size());
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());

	bool IsGraphics = AMDGPU::isGraphics(CallConv);
	bool IsKernel = AMDGPU::isKernel(CallConv);
	bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);

	if (IsGraphics) {
	assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
	(!Info->hasFlatScratchInit() \|\| Subtarget->enableFlatScratch()) &&
	!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
	!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
	!Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
	!Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
	}

	if (CallConv == CallingConv::AMDGPU_PS) {
	processPSInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);

	// At least one interpolation mode must be enabled or else the GPU will
	// hang.
	//
	// Check PSInputAddr instead of PSInputEnable. The idea is that if the user
	// set PSInputAddr, the user wants to enable some bits after the compilation
	// based on run-time states. Since we can't know what the final PSInputEna
	// will look like, so we shouldn't do anything here and the user should take
	// responsibility for the correct programming.
	//
	// Otherwise, the following restrictions apply:
	// - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
	// - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
	// enabled too.
	if ((Info->getPSInputAddr() & 0x7F) == 0 \|\|
	((Info->getPSInputAddr() & 0xF) == 0 && Info->isPSInputAllocated(11))) {
	CCInfo.AllocateReg(AMDGPU::VGPR0);
	CCInfo.AllocateReg(AMDGPU::VGPR1);
	Info->markPSInputAllocated(0);
	Info->markPSInputEnabled(0);
	}
	if (Subtarget->isAmdPalOS()) {
	// For isAmdPalOS, the user does not enable some bits after compilation
	// based on run-time states; the register values being generated here are
	// the final ones set in hardware. Therefore we need to apply the
	// workaround to PSInputAddr and PSInputEnable together. (The case where
	// a bit is set in PSInputAddr but not PSInputEnable is where the
	// frontend set up an input arg for a particular interpolation mode, but
	// nothing uses that input arg. Really we should have an earlier pass
	// that removes such an arg.)
	unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
	if ((PsInputBits & 0x7F) == 0 \|\|
	((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
	Info->markPSInputEnabled(
	countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
	}
	} else if (IsKernel) {
	assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
	} else {
	Splits.append(Ins.begin(), Ins.end());
	}

	if (IsEntryFunc) {
	allocateSpecialEntryInputVGPRs(CCInfo, MF, TRI, Info);
	allocateHSAUserSGPRs(CCInfo, MF, TRI, Info);
	} else if (!IsGraphics) {
	// For the fixed ABI, pass workitem IDs in the last argument register.
	allocateSpecialInputVGPRsFixed(CCInfo, MF, TRI, Info);
	}

	if (IsKernel) {
	analyzeFormalArgumentsCompute(CCInfo, Ins);
	} else {
	CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, isVarArg);
	CCInfo.AnalyzeFormalArguments(Splits, AssignFn);
	}

	SmallVector<SDValue, 16> Chains;

	// FIXME: This is the minimum kernel argument alignment. We should improve
	// this to the maximum alignment of the arguments.
	//
	// FIXME: Alignment of explicit arguments totally broken with non-0 explicit
	// kern arg offset.
	const Align KernelArgBaseAlign = Align(16);

	for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
	const ISD::InputArg &Arg = Ins[i];
	if (Arg.isOrigArg() && Skipped[Arg.getOrigArgIndex()]) {
	InVals.push_back(DAG.getUNDEF(Arg.VT));
	continue;
	}

	CCValAssign &VA = ArgLocs[ArgIdx++];
	MVT VT = VA.getLocVT();

	if (IsEntryFunc && VA.isMemLoc()) {
	VT = Ins[i].VT;
	EVT MemVT = VA.getLocVT();

	const uint64_t Offset = VA.getLocMemOffset();
	Align Alignment = commonAlignment(KernelArgBaseAlign, Offset);

	if (Arg.Flags.isByRef()) {
	SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, Chain, Offset);

	const GCNTargetMachine &TM =
	static_cast<const GCNTargetMachine &>(getTargetMachine());
	if (!TM.isNoopAddrSpaceCast(AMDGPUAS::CONSTANT_ADDRESS,
	Arg.Flags.getPointerAddrSpace())) {
	Ptr = DAG.getAddrSpaceCast(DL, VT, Ptr, AMDGPUAS::CONSTANT_ADDRESS,
	Arg.Flags.getPointerAddrSpace());
	}

	InVals.push_back(Ptr);
	continue;
	}

	SDValue Arg = lowerKernargMemParameter(
	DAG, VT, MemVT, DL, Chain, Offset, Alignment, Ins[i].Flags.isSExt(), &Ins[i]);
	Chains.push_back(Arg.getValue(1));

	auto *ParamTy =
	dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
	if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
	ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS \|\|
	ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) {
	// On SI local pointers are just offsets into LDS, so they are always
	// less than 16-bits. On CI and newer they could potentially be
	// real pointers, so we can't guarantee their size.
	Arg = DAG.getNode(ISD::AssertZext, DL, Arg.getValueType(), Arg,
	DAG.getValueType(MVT::i16));
	}

	InVals.push_back(Arg);
	continue;
	} else if (!IsEntryFunc && VA.isMemLoc()) {
	SDValue Val = lowerStackParameter(DAG, VA, DL, Chain, Arg);
	InVals.push_back(Val);
	if (!Arg.Flags.isByVal())
	Chains.push_back(Val.getValue(1));
	continue;
	}

	assert(VA.isRegLoc() && "Parameter must be in a register!");

	Register Reg = VA.getLocReg();
	const TargetRegisterClass *RC = nullptr;
	if (AMDGPU::VGPR_32RegClass.contains(Reg))
	RC = &AMDGPU::VGPR_32RegClass;
	else if (AMDGPU::SGPR_32RegClass.contains(Reg))
	RC = &AMDGPU::SGPR_32RegClass;
	else
	llvm_unreachable("Unexpected register class in LowerFormalArguments!");
	EVT ValVT = VA.getValVT();

	Reg = MF.addLiveIn(Reg, RC);
	SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);

	if (Arg.Flags.isSRet()) {
	// The return object should be reasonably addressable.

	// FIXME: This helps when the return is a real sret. If it is a
	// automatically inserted sret (i.e. CanLowerReturn returns false), an
	// extra copy is inserted in SelectionDAGBuilder which obscures this.
	unsigned NumBits
	= 32 - getSubtarget()->getKnownHighZeroBitsForFrameIndex();
	Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
	DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), NumBits)));
	}

	// If this is an 8 or 16-bit value, it is really passed promoted
	// to 32 bits. Insert an assert[sz]ext to capture this, then
	// truncate to the right size.
	switch (VA.getLocInfo()) {
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Val = DAG.getNode(ISD::BITCAST, DL, ValVT, Val);
	break;
	case CCValAssign::SExt:
	Val = DAG.getNode(ISD::AssertSext, DL, VT, Val,
	DAG.getValueType(ValVT));
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
	break;
	case CCValAssign::ZExt:
	Val = DAG.getNode(ISD::AssertZext, DL, VT, Val,
	DAG.getValueType(ValVT));
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
	break;
	case CCValAssign::AExt:
	Val = DAG.getNode(ISD::TRUNCATE, DL, ValVT, Val);
	break;
	default:
	llvm_unreachable("Unknown loc info!");
	}

	InVals.push_back(Val);
	}

	// Start adding system SGPRs.
	if (IsEntryFunc) {
	allocateSystemSGPRs(CCInfo, MF, *Info, CallConv, IsGraphics);
	} else {
	CCInfo.AllocateReg(Info->getScratchRSrcReg());
	if (!IsGraphics)
	allocateSpecialInputSGPRs(CCInfo, MF, TRI, Info);
	}

	auto &ArgUsageInfo =
	DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
	ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo());

	unsigned StackArgSize = CCInfo.getNextStackOffset();
	Info->setBytesInStackArgArea(StackArgSize);

	return Chains.empty() ? Chain :
	DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
	}

	// TODO: If return values can't fit in registers, we should return as many as
	// possible in registers before passing on stack.
	bool SITargetLowering::CanLowerReturn(
	CallingConv::ID CallConv,
	MachineFunction &MF, bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	LLVMContext &Context) const {
	// Replacing returns with sret/stack usage doesn't make sense for shaders.
	// FIXME: Also sort of a workaround for custom vector splitting in LowerReturn
	// for shaders. Vector types should be explicitly handled by CC.
	if (AMDGPU::isEntryFunctionCC(CallConv))
	return true;

	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
	return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
	}

	SDValue
	SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	if (AMDGPU::isKernel(CallConv)) {
	return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
	OutVals, DL, DAG);
	}

	bool IsShader = AMDGPU::isShader(CallConv);

	Info->setIfReturnsVoid(Outs.empty());
	bool IsWaveEnd = Info->returnsVoid() && IsShader;

	// CCValAssign - represent the assignment of the return value to a location.
	SmallVector<CCValAssign, 48> RVLocs;
	SmallVector<ISD::OutputArg, 48> Splits;

	// CCState - Info about the registers and stack slots.
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Analyze outgoing return values.
	CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));

	SDValue Flag;
	SmallVector<SDValue, 48> RetOps;
	RetOps.push_back(Chain); // Operand #0 = Chain (updated below)

	// Copy the result values into the output registers.
	for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
	++I, ++RealRVLocIdx) {
	CCValAssign &VA = RVLocs[I];
	assert(VA.isRegLoc() && "Can only return in registers!");
	// TODO: Partially return in registers if return values don't fit.
	SDValue Arg = OutVals[RealRVLocIdx];

	// Copied from other backends.
	switch (VA.getLocInfo()) {
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	default:
	llvm_unreachable("Unknown loc info!");
	}

	Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}

	// FIXME: Does sret work properly?
	if (!Info->isEntryFunction()) {
	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const MCPhysReg *I =
	TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
	if (I) {
	for (; *I; ++I) {
	if (AMDGPU::SReg_64RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i64));
	else if (AMDGPU::SReg_32RegClass.contains(*I))
	RetOps.push_back(DAG.getRegister(*I, MVT::i32));
	else
	llvm_unreachable("Unexpected register class in CSRsViaCopy!");
	}
	}
	}

	// Update chain and glue.
	RetOps[0] = Chain;
	if (Flag.getNode())
	RetOps.push_back(Flag);

	unsigned Opc = AMDGPUISD::ENDPGM;
	if (!IsWaveEnd)
	Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
	return DAG.getNode(Opc, DL, MVT::Other, RetOps);
	}

	SDValue SITargetLowering::LowerCallResult(
	SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool IsThisReturn,
	SDValue ThisVal) const {
	CCAssignFn *RetCC = CCAssignFnForReturn(CallConv, IsVarArg);

	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallResult(Ins, RetCC);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign VA = RVLocs[i];
	SDValue Val;

	if (VA.isRegLoc()) {
	Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
	Chain = Val.getValue(1);
	InFlag = Val.getValue(2);
	} else if (VA.isMemLoc()) {
	report_fatal_error("TODO: return values in memory");
	} else
	llvm_unreachable("unknown argument location type");

	switch (VA.getLocInfo()) {
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
	break;
	case CCValAssign::ZExt:
	Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
	DAG.getValueType(VA.getValVT()));
	Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
	break;
	case CCValAssign::SExt:
	Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
	DAG.getValueType(VA.getValVT()));
	Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
	break;
	case CCValAssign::AExt:
	Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
	break;
	default:
	llvm_unreachable("Unknown loc info!");
	}

	InVals.push_back(Val);
	}

	return Chain;
	}

	// Add code to pass special inputs required depending on used features separate
	// from the explicit user arguments present in the IR.
	void SITargetLowering::passSpecialInputs(
	CallLoweringInfo &CLI,
	CCState &CCInfo,
	const SIMachineFunctionInfo &Info,
	SmallVectorImpl<std::pair<unsigned, SDValue>> &RegsToPass,
	SmallVectorImpl<SDValue> &MemOpChains,
	SDValue Chain) const {
	// If we don't have a call site, this was a call inserted by
	// legalization. These can never use special inputs.
	if (!CLI.CB)
	return;

	SelectionDAG &DAG = CLI.DAG;
	const SDLoc &DL = CLI.DL;
	const Function &F = DAG.getMachineFunction().getFunction();

	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const AMDGPUFunctionArgInfo &CallerArgInfo = Info.getArgInfo();

	const AMDGPUFunctionArgInfo *CalleeArgInfo
	= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
	if (const Function *CalleeFunc = CLI.CB->getCalledFunction()) {
	auto &ArgUsageInfo =
	DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
	CalleeArgInfo = &ArgUsageInfo.lookupFuncArgInfo(*CalleeFunc);
	}

	// TODO: Unify with private memory register handling. This is complicated by
	// the fact that at least in kernels, the input argument is not necessarily
	// in the same location as the input.
	static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
	StringLiteral> ImplicitAttrs[] = {
	{AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
	{AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
	{AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
	{AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
	{AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
	{AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
	{AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"},
	{AMDGPUFunctionArgInfo::LDS_KERNEL_ID,"amdgpu-no-lds-kernel-id"},
	};

	for (auto Attr : ImplicitAttrs) {
	const ArgDescriptor *OutgoingArg;
	const TargetRegisterClass *ArgRC;
	LLT ArgTy;

	AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;

	// If the callee does not use the attribute value, skip copying the value.
	if (CLI.CB->hasFnAttr(Attr.second))
	continue;

	std::tie(OutgoingArg, ArgRC, ArgTy) =
	CalleeArgInfo->getPreloadedValue(InputID);
	if (!OutgoingArg)
	continue;

	const ArgDescriptor *IncomingArg;
	const TargetRegisterClass *IncomingArgRC;
	LLT Ty;
	std::tie(IncomingArg, IncomingArgRC, Ty) =
	CallerArgInfo.getPreloadedValue(InputID);
	assert(IncomingArgRC == ArgRC);

	// All special arguments are ints for now.
	EVT ArgVT = TRI->getSpillSize(*ArgRC) == 8 ? MVT::i64 : MVT::i32;
	SDValue InputReg;

	if (IncomingArg) {
	InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
	} else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
	// The implicit arg ptr is special because it doesn't have a corresponding
	// input for kernels, and is computed from the kernarg segment pointer.
	InputReg = getImplicitArgPtr(DAG, DL);
	} else if (InputID == AMDGPUFunctionArgInfo::LDS_KERNEL_ID) {
	Optional<uint32_t> Id = AMDGPUMachineFunction::getLDSKernelIdMetadata(F);
	if (Id.has_value()) {
	InputReg = DAG.getConstant(Id.value(), DL, ArgVT);
	} else {
	InputReg = DAG.getUNDEF(ArgVT);
	}
	} else {
	// We may have proven the input wasn't needed, although the ABI is
	// requiring it. We just need to allocate the register appropriately.
	InputReg = DAG.getUNDEF(ArgVT);
	}

	if (OutgoingArg->isRegister()) {
	RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
	if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
	report_fatal_error("failed to allocate implicit input argument");
	} else {
	unsigned SpecialArgOffset =
	CCInfo.AllocateStack(ArgVT.getStoreSize(), Align(4));
	SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
	SpecialArgOffset);
	MemOpChains.push_back(ArgStore);
	}
	}

	// Pack workitem IDs into a single register or pass it as is if already
	// packed.
	const ArgDescriptor *OutgoingArg;
	const TargetRegisterClass *ArgRC;
	LLT Ty;

	std::tie(OutgoingArg, ArgRC, Ty) =
	CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
	if (!OutgoingArg)
	std::tie(OutgoingArg, ArgRC, Ty) =
	CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
	if (!OutgoingArg)
	std::tie(OutgoingArg, ArgRC, Ty) =
	CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
	if (!OutgoingArg)
	return;

	const ArgDescriptor *IncomingArgX = std::get<0>(
	CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X));
	const ArgDescriptor *IncomingArgY = std::get<0>(
	CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Y));
	const ArgDescriptor *IncomingArgZ = std::get<0>(
	CallerArgInfo.getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_Z));

	SDValue InputReg;
	SDLoc SL;

	const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
	const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
	const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");

	// If incoming ids are not packed we need to pack them.
	if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
	NeedWorkItemIDX) {
	if (Subtarget->getMaxWorkitemID(F, 0) != 0) {
	InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
	} else {
	InputReg = DAG.getConstant(0, DL, MVT::i32);
	}
	}

	if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
	NeedWorkItemIDY && Subtarget->getMaxWorkitemID(F, 1) != 0) {
	SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
	Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
	DAG.getShiftAmountConstant(10, MVT::i32, SL));
	InputReg = InputReg.getNode() ?
	DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
	}

	if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
	NeedWorkItemIDZ && Subtarget->getMaxWorkitemID(F, 2) != 0) {
	SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
	Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
	DAG.getShiftAmountConstant(20, MVT::i32, SL));
	InputReg = InputReg.getNode() ?
	DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
	}

	if (!InputReg && (NeedWorkItemIDX \|\| NeedWorkItemIDY \|\| NeedWorkItemIDZ)) {
	if (!IncomingArgX && !IncomingArgY && !IncomingArgZ) {
	// We're in a situation where the outgoing function requires the workitem
	// ID, but the calling function does not have it (e.g a graphics function
	// calling a C calling convention function). This is illegal, but we need
	// to produce something.
	InputReg = DAG.getUNDEF(MVT::i32);
	} else {
	// Workitem ids are already packed, any of present incoming arguments
	// will carry all required fields.
	ArgDescriptor IncomingArg = ArgDescriptor::createArg(
	IncomingArgX ? *IncomingArgX :
	IncomingArgY ? *IncomingArgY :
	*IncomingArgZ, ~0u);
	InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, IncomingArg);
	}
	}

	if (OutgoingArg->isRegister()) {
	if (InputReg)
	RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);

	CCInfo.AllocateReg(OutgoingArg->getRegister());
	} else {
	unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
	if (InputReg) {
	SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
	SpecialArgOffset);
	MemOpChains.push_back(ArgStore);
	}
	}
	}

	static bool canGuaranteeTCO(CallingConv::ID CC) {
	return CC == CallingConv::Fast;
	}

	/// Return true if we might ever do TCO for calls with this calling convention.
	static bool mayTailCallThisCC(CallingConv::ID CC) {
	switch (CC) {
	case CallingConv::C:
	case CallingConv::AMDGPU_Gfx:
	return true;
	default:
	return canGuaranteeTCO(CC);
	}
	}

	bool SITargetLowering::isEligibleForTailCallOptimization(
	SDValue Callee, CallingConv::ID CalleeCC, bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
	if (!mayTailCallThisCC(CalleeCC))
	return false;

	// For a divergent call target, we need to do a waterfall loop over the
	// possible callees which precludes us from using a simple jump.
	if (Callee->isDivergent())
	return false;

	MachineFunction &MF = DAG.getMachineFunction();
	const Function &CallerF = MF.getFunction();
	CallingConv::ID CallerCC = CallerF.getCallingConv();
	const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
	const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);

	// Kernels aren't callable, and don't have a live in return address so it
	// doesn't make sense to do a tail call with entry functions.
	if (!CallerPreserved)
	return false;

	bool CCMatch = CallerCC == CalleeCC;

	if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
	if (canGuaranteeTCO(CalleeCC) && CCMatch)
	return true;
	return false;
	}

	// TODO: Can we handle var args?
	if (IsVarArg)
	return false;

	for (const Argument &Arg : CallerF.args()) {
	if (Arg.hasByValAttr())
	return false;
	}

	LLVMContext &Ctx = *DAG.getContext();

	// Check that the call results are passed in the same way.
	if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, Ctx, Ins,
	CCAssignFnForCall(CalleeCC, IsVarArg),
	CCAssignFnForCall(CallerCC, IsVarArg)))
	return false;

	// The callee has to preserve all registers the caller needs to preserve.
	if (!CCMatch) {
	const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
	if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
	return false;
	}

	// Nothing more to check if the callee is taking no arguments.
	if (Outs.empty())
	return true;

	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, Ctx);

	CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, IsVarArg));

	const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
	// If the stack arguments for this call do not fit into our own save area then
	// the call cannot be made tail.
	// TODO: Is this really necessary?
	if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
	return false;

	const MachineRegisterInfo &MRI = MF.getRegInfo();
	return parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals);
	}

	bool SITargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
	if (!CI->isTailCall())
	return false;

	const Function *ParentFn = CI->getParent()->getParent();
	if (AMDGPU::isEntryFunctionCC(ParentFn->getCallingConv()))
	return false;
	return true;
	}

	// The wave scratch offset register is used as the global base pointer.
	SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	const SDLoc &DL = CLI.DL;
	SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
	SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
	SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &IsTailCall = CLI.IsTailCall;
	CallingConv::ID CallConv = CLI.CallConv;
	bool IsVarArg = CLI.IsVarArg;
	bool IsSibCall = false;
	bool IsThisReturn = false;
	MachineFunction &MF = DAG.getMachineFunction();

	if (Callee.isUndef() \|\| isNullConstant(Callee)) {
	if (!CLI.IsTailCall) {
	for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
	InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
	}

	return Chain;
	}

	if (IsVarArg) {
	return lowerUnhandledCall(CLI, InVals,
	"unsupported call to variadic function ");
	}

	if (!CLI.CB)
	report_fatal_error("unsupported libcall legalization");

	if (IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
	return lowerUnhandledCall(CLI, InVals,
	"unsupported required tail call to function ");
	}

	if (AMDGPU::isShader(CallConv)) {
	// Note the issue is with the CC of the called function, not of the call
	// itself.
	return lowerUnhandledCall(CLI, InVals,
	"unsupported call to a shader function ");
	}

	if (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
	CallConv != CallingConv::AMDGPU_Gfx) {
	// Only allow calls with specific calling conventions.
	return lowerUnhandledCall(CLI, InVals,
	"unsupported calling convention for call from "
	"graphics shader of function ");
	}

	if (IsTailCall) {
	IsTailCall = isEligibleForTailCallOptimization(
	Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
	if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall()) {
	report_fatal_error("failed to perform tail call elimination on a call "
	"site marked musttail");
	}

	bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;

	// A sibling call is one where we're under the usual C ABI and not planning
	// to change that but can still do a tail call:
	if (!TailCallOpt && IsTailCall)
	IsSibCall = true;

	if (IsTailCall)
	++NumTailCalls;
	}

	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
	CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);

	if (CallConv != CallingConv::AMDGPU_Gfx) {
	// With a fixed ABI, allocate fixed registers before user arguments.
	passSpecialInputs(CLI, CCInfo, *Info, RegsToPass, MemOpChains, Chain);
	}

	CCInfo.AnalyzeCallOperands(Outs, AssignFn);

	// Get a count of how many bytes are to be pushed on the stack.
	unsigned NumBytes = CCInfo.getNextStackOffset();

	if (IsSibCall) {
	// Since we're not changing the ABI to make this a tail call, the memory
	// operands are already available in the caller's incoming argument space.
	NumBytes = 0;
	}

	// FPDiff is the byte offset of the call's argument area from the callee's.
	// Stores to callee stack arguments will be placed in FixedStackSlots offset
	// by this amount for a tail call. In a sibling call it must be 0 because the
	// caller will deallocate the entire stack and the callee still expects its
	// arguments to begin at SP+0. Completely unused for non-tail calls.
	int32_t FPDiff = 0;
	MachineFrameInfo &MFI = MF.getFrameInfo();

	// Adjust the stack pointer for the new arguments...
	// These operations are automatically eliminated by the prolog/epilog pass
	if (!IsSibCall) {
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);

	if (!Subtarget->enableFlatScratch()) {
	SmallVector<SDValue, 4> CopyFromChains;

	// In the HSA case, this should be an identity copy.
	SDValue ScratchRSrcReg
	= DAG.getCopyFromReg(Chain, DL, Info->getScratchRSrcReg(), MVT::v4i32);
	RegsToPass.emplace_back(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, ScratchRSrcReg);
	CopyFromChains.push_back(ScratchRSrcReg.getValue(1));
	Chain = DAG.getTokenFactor(DL, CopyFromChains);
	}
	}

	MVT PtrVT = MVT::i32;

	// Walk the register/memloc assignments, inserting copies/loads.
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[i];

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	case CCValAssign::Full:
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::FPExt:
	Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	default:
	llvm_unreachable("Unknown loc info!");
	}

	if (VA.isRegLoc()) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	} else {
	assert(VA.isMemLoc());

	SDValue DstAddr;
	MachinePointerInfo DstInfo;

	unsigned LocMemOffset = VA.getLocMemOffset();
	int32_t Offset = LocMemOffset;

	SDValue PtrOff = DAG.getConstant(Offset, DL, PtrVT);
	MaybeAlign Alignment;

	if (IsTailCall) {
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	unsigned OpSize = Flags.isByVal() ?
	Flags.getByValSize() : VA.getValVT().getStoreSize();

	// FIXME: We can have better than the minimum byval required alignment.
	Alignment =
	Flags.isByVal()
	? Flags.getNonZeroByValAlign()
	: commonAlignment(Subtarget->getStackAlignment(), Offset);

	Offset = Offset + FPDiff;
	int FI = MFI.CreateFixedObject(OpSize, Offset, true);

	DstAddr = DAG.getFrameIndex(FI, PtrVT);
	DstInfo = MachinePointerInfo::getFixedStack(MF, FI);

	// Make sure any stack arguments overlapping with where we're storing
	// are loaded before this eventual operation. Otherwise they'll be
	// clobbered.

	// FIXME: Why is this really necessary? This seems to just result in a
	// lot of code to copy the stack and write them back to the same
	// locations, which are supposed to be immutable?
	Chain = addTokenForArgument(Chain, DAG, MFI, FI);
	} else {
	// Stores to the argument stack area are relative to the stack pointer.
	SDValue SP = DAG.getCopyFromReg(Chain, DL, Info->getStackPtrOffsetReg(),
	MVT::i32);
	DstAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, SP, PtrOff);
	DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
	Alignment =
	commonAlignment(Subtarget->getStackAlignment(), LocMemOffset);
	}

	if (Outs[i].Flags.isByVal()) {
	SDValue SizeNode =
	DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i32);
	SDValue Cpy =
	DAG.getMemcpy(Chain, DL, DstAddr, Arg, SizeNode,
	Outs[i].Flags.getNonZeroByValAlign(),
	/isVol = / false, /AlwaysInline = / true,
	/isTailCall = / false, DstInfo,
	MachinePointerInfo(AMDGPUAS::PRIVATE_ADDRESS));

	MemOpChains.push_back(Cpy);
	} else {
	SDValue Store =
	DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, Alignment);
	MemOpChains.push_back(Store);
	}
	}
	}

	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token chain
	// and flag operands which copy the outgoing args into the appropriate regs.
	SDValue InFlag;
	for (auto &RegToPass : RegsToPass) {
	Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
	RegToPass.second, InFlag);
	InFlag = Chain.getValue(1);
	}


	// We don't usually want to end the call-sequence here because we would tidy
	// the frame up after the call, however in the ABI-changing tail-call case
	// we've carefully laid out the parameters so that when sp is reset they'll be
	// in the correct location.
	if (IsTailCall && !IsSibCall) {
	Chain = DAG.getCALLSEQ_END(Chain,
	DAG.getTargetConstant(NumBytes, DL, MVT::i32),
	DAG.getTargetConstant(0, DL, MVT::i32),
	InFlag, DL);
	InFlag = Chain.getValue(1);
	}

	std::vector<SDValue> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);
	// Add a redundant copy of the callee global which will not be legalized, as
	// we need direct access to the callee later.
	if (GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(Callee)) {
	const GlobalValue *GV = GSD->getGlobal();
	Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64));
	} else {
	Ops.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
	}

	if (IsTailCall) {
	// Each tail call may have to adjust the stack by a different amount, so
	// this information must travel along with the operation for eventual
	// consumption by emitEpilogue.
	Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
	}

	// Add argument registers to the end of the list so that they are known live
	// into the call.
	for (auto &RegToPass : RegsToPass) {
	Ops.push_back(DAG.getRegister(RegToPass.first,
	RegToPass.second.getValueType()));
	}

	// Add a register mask operand representing the call-preserved registers.

	auto TRI = static_cast<const SIRegisterInfo>(Subtarget->getRegisterInfo());
	const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);

	// If we're doing a tall call, use a TC_RETURN here rather than an
	// actual call instruction.
	if (IsTailCall) {
	MFI.setHasTailCall();
	return DAG.getNode(AMDGPUISD::TC_RETURN, DL, NodeTys, Ops);
	}

	// Returns a chain and a flag for retval copy to use.
	SDValue Call = DAG.getNode(AMDGPUISD::CALL, DL, NodeTys, Ops);
	Chain = Call.getValue(0);
	InFlag = Call.getValue(1);

	uint64_t CalleePopBytes = NumBytes;
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
	DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
	InFlag, DL);
	if (!Ins.empty())
	InFlag = Chain.getValue(1);

	// Handle result values, copying them out of physregs into vregs that we
	// return.
	return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
	InVals, IsThisReturn,
	IsThisReturn ? OutVals[0] : SDValue());
	}

	// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
	// except for applying the wave size scale to the increment amount.
	SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(
	SDValue Op, SelectionDAG &DAG) const {
	const MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	SDValue Tmp1 = Op;
	SDValue Tmp2 = Op.getValue(1);
	SDValue Tmp3 = Op.getOperand(2);
	SDValue Chain = Tmp1.getOperand(0);

	Register SPReg = Info->getStackPtrOffsetReg();

	// Chain the dynamic stack allocation so that it doesn't modify the stack
	// pointer when other instructions are using the stack.
	Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);

	SDValue Size = Tmp2.getOperand(1);
	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
	Chain = SP.getValue(1);
	MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
	const TargetFrameLowering *TFL = ST.getFrameLowering();
	unsigned Opc =
	TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ?
	ISD::ADD : ISD::SUB;

	SDValue ScaledSize = DAG.getNode(
	ISD::SHL, dl, VT, Size,
	DAG.getConstant(ST.getWavefrontSizeLog2(), dl, MVT::i32));

	Align StackAlign = TFL->getStackAlign();
	Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
	if (Alignment && *Alignment > StackAlign) {
	Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
	DAG.getConstant(-(uint64_t)Alignment->value()
	<< ST.getWavefrontSizeLog2(),
	dl, VT));
	}

	Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
	Tmp2 = DAG.getCALLSEQ_END(
	Chain, DAG.getIntPtrConstant(0, dl, true),
	DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);

	return DAG.getMergeValues({Tmp1, Tmp2}, dl);
	}

	SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
	SelectionDAG &DAG) const {
	// We only handle constant sizes here to allow non-entry block, static sized
	// allocas. A truly dynamic value is more difficult to support because we
	// don't know if the size value is uniform or not. If the size isn't uniform,
	// we would need to do a wave reduction to get the maximum size to know how
	// much to increment the uniform stack pointer.
	SDValue Size = Op.getOperand(1);
	if (isa<ConstantSDNode>(Size))
	return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.

	return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
	}

	Register SITargetLowering::getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const {
	Register Reg = StringSwitch<Register>(RegName)
	.Case("m0", AMDGPU::M0)
	.Case("exec", AMDGPU::EXEC)
	.Case("exec_lo", AMDGPU::EXEC_LO)
	.Case("exec_hi", AMDGPU::EXEC_HI)
	.Case("flat_scratch", AMDGPU::FLAT_SCR)
	.Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
	.Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
	.Default(Register());

	if (Reg == AMDGPU::NoRegister) {
	report_fatal_error(Twine("invalid register name \""
	+ StringRef(RegName) + "\"."));

	}

	if (!Subtarget->hasFlatScrRegister() &&
	Subtarget->getRegisterInfo()->regsOverlap(Reg, AMDGPU::FLAT_SCR)) {
	report_fatal_error(Twine("invalid register \""
	+ StringRef(RegName) + "\" for subtarget."));
	}

	switch (Reg) {
	case AMDGPU::M0:
	case AMDGPU::EXEC_LO:
	case AMDGPU::EXEC_HI:
	case AMDGPU::FLAT_SCR_LO:
	case AMDGPU::FLAT_SCR_HI:
	if (VT.getSizeInBits() == 32)
	return Reg;
	break;
	case AMDGPU::EXEC:
	case AMDGPU::FLAT_SCR:
	if (VT.getSizeInBits() == 64)
	return Reg;
	break;
	default:
	llvm_unreachable("missing register type checking");
	}

	report_fatal_error(Twine("invalid type for register \""
	+ StringRef(RegName) + "\"."));
	}

	// If kill is not the last instruction, split the block so kill is always a
	// proper terminator.
	MachineBasicBlock *
	SITargetLowering::splitKillBlock(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	MachineBasicBlock SplitBB = BB->splitAt(MI, false /UpdateLiveIns*/);
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	MI.setDesc(TII->getKillTerminatorFromPseudo(MI.getOpcode()));
	return SplitBB;
	}

	// Split block \p MBB at \p MI, as to insert a loop. If \p InstInLoop is true,
	// \p MI will be the only instruction in the loop body block. Otherwise, it will
	// be the first instruction in the remainder block.
	//
	/// \returns { LoopBody, Remainder }
	static std::pair<MachineBasicBlock , MachineBasicBlock >
	splitBlockForLoop(MachineInstr &MI, MachineBasicBlock &MBB, bool InstInLoop) {
	MachineFunction *MF = MBB.getParent();
	MachineBasicBlock::iterator I(&MI);

	// To insert the loop we need to split the block. Move everything after this
	// point to a new block, and insert a new empty block between the two.
	MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
	MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
	MachineFunction::iterator MBBI(MBB);
	++MBBI;

	MF->insert(MBBI, LoopBB);
	MF->insert(MBBI, RemainderBB);

	LoopBB->addSuccessor(LoopBB);
	LoopBB->addSuccessor(RemainderBB);

	// Move the rest of the block into a new block.
	RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);

	if (InstInLoop) {
	auto Next = std::next(I);

	// Move instruction to loop body.
	LoopBB->splice(LoopBB->begin(), &MBB, I, Next);

	// Move the rest of the block.
	RemainderBB->splice(RemainderBB->begin(), &MBB, Next, MBB.end());
	} else {
	RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
	}

	MBB.addSuccessor(LoopBB);

	return std::make_pair(LoopBB, RemainderBB);
	}

	/// Insert \p MI into a BUNDLE with an S_WAITCNT 0 immediately following it.
	void SITargetLowering::bundleInstWithWaitcnt(MachineInstr &MI) const {
	MachineBasicBlock *MBB = MI.getParent();
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	auto I = MI.getIterator();
	auto E = std::next(I);

	BuildMI(*MBB, E, MI.getDebugLoc(), TII->get(AMDGPU::S_WAITCNT))
	.addImm(0);

	MIBundleBuilder Bundler(*MBB, I, E);
	finalizeBundle(*MBB, Bundler.begin());
	}

	MachineBasicBlock *
	SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	const DebugLoc &DL = MI.getDebugLoc();

	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();

	MachineBasicBlock *LoopBB;
	MachineBasicBlock *RemainderBB;
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();

	// Apparently kill flags are only valid if the def is in the same block?
	if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0))
	Src->setIsKill(false);

	std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);

	MachineBasicBlock::iterator I = LoopBB->end();

	const unsigned EncodedReg = AMDGPU::Hwreg::encodeHwreg(
	AMDGPU::Hwreg::ID_TRAPSTS, AMDGPU::Hwreg::OFFSET_MEM_VIOL, 1);

	// Clear TRAP_STS.MEM_VIOL
	BuildMI(*LoopBB, LoopBB->begin(), DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))
	.addImm(0)
	.addImm(EncodedReg);

	bundleInstWithWaitcnt(MI);

	Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);

	// Load and check TRAP_STS.MEM_VIOL
	BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_GETREG_B32), Reg)
	.addImm(EncodedReg);

	// FIXME: Do we need to use an isel pseudo that may clobber scc?
	BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CMP_LG_U32))
	.addReg(Reg, RegState::Kill)
	.addImm(0);
	BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
	.addMBB(LoopBB);

	return RemainderBB;
	}

	// Do a v_movrels_b32 or v_movreld_b32 for each unique value of \p IdxReg in the
	// wavefront. If the value is uniform and just happens to be in a VGPR, this
	// will only do one iteration. In the worst case, this will loop 64 times.
	//
	// TODO: Just use v_readlane_b32 if we know the VGPR has a uniform value.
	static MachineBasicBlock::iterator
	emitLoadM0FromVGPRLoop(const SIInstrInfo *TII, MachineRegisterInfo &MRI,
	MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
	const DebugLoc &DL, const MachineOperand &Idx,
	unsigned InitReg, unsigned ResultReg, unsigned PhiReg,
	unsigned InitSaveExecReg, int Offset, bool UseGPRIdxMode,
	Register &SGPRIdxReg) {

	MachineFunction *MF = OrigBB.getParent();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	MachineBasicBlock::iterator I = LoopBB.begin();

	const TargetRegisterClass *BoolRC = TRI->getBoolRC();
	Register PhiExec = MRI.createVirtualRegister(BoolRC);
	Register NewExec = MRI.createVirtualRegister(BoolRC);
	Register CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
	Register CondReg = MRI.createVirtualRegister(BoolRC);

	BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
	.addReg(InitReg)
	.addMBB(&OrigBB)
	.addReg(ResultReg)
	.addMBB(&LoopBB);

	BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiExec)
	.addReg(InitSaveExecReg)
	.addMBB(&OrigBB)
	.addReg(NewExec)
	.addMBB(&LoopBB);

	// Read the next variant <- also loop target.
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
	.addReg(Idx.getReg(), getUndefRegState(Idx.isUndef()));

	// Compare the just read M0 value to all possible Idx values.
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
	.addReg(CurrentIdxReg)
	.addReg(Idx.getReg(), 0, Idx.getSubReg());

	// Update EXEC, save the original EXEC value to VCC.
	BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_AND_SAVEEXEC_B32
	: AMDGPU::S_AND_SAVEEXEC_B64),
	NewExec)
	.addReg(CondReg, RegState::Kill);

	MRI.setSimpleHint(NewExec, CondReg);

	if (UseGPRIdxMode) {
	if (Offset == 0) {
	SGPRIdxReg = CurrentIdxReg;
	} else {
	SGPRIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SGPRIdxReg)
	.addReg(CurrentIdxReg, RegState::Kill)
	.addImm(Offset);
	}
	} else {
	// Move index from VCC into M0
	if (Offset == 0) {
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
	.addReg(CurrentIdxReg, RegState::Kill);
	} else {
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
	.addReg(CurrentIdxReg, RegState::Kill)
	.addImm(Offset);
	}
	}

	// Update EXEC, switch all done bits to 0 and all todo bits to 1.
	unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
	MachineInstr *InsertPt =
	BuildMI(LoopBB, I, DL, TII->get(ST.isWave32() ? AMDGPU::S_XOR_B32_term
	: AMDGPU::S_XOR_B64_term), Exec)
	.addReg(Exec)
	.addReg(NewExec);

	// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
	// s_cbranch_scc0?

	// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
	BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
	.addMBB(&LoopBB);

	return InsertPt->getIterator();
	}

	// This has slightly sub-optimal regalloc when the source vector is killed by
	// the read. The register allocator does not understand that the kill is
	// per-workitem, so is kept alive for the whole loop so we end up not re-using a
	// subregister from it, using 1 more VGPR than necessary. This was saved when
	// this was expanded after register allocation.
	static MachineBasicBlock::iterator
	loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
	unsigned InitResultReg, unsigned PhiReg, int Offset,
	bool UseGPRIdxMode, Register &SGPRIdxReg) {
	MachineFunction *MF = MBB.getParent();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	MachineRegisterInfo &MRI = MF->getRegInfo();
	const DebugLoc &DL = MI.getDebugLoc();
	MachineBasicBlock::iterator I(&MI);

	const auto *BoolXExecRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
	Register DstReg = MI.getOperand(0).getReg();
	Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
	Register TmpExec = MRI.createVirtualRegister(BoolXExecRC);
	unsigned Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
	unsigned MovExecOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;

	BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);

	// Save the EXEC mask
	BuildMI(MBB, I, DL, TII->get(MovExecOpc), SaveExec)
	.addReg(Exec);

	MachineBasicBlock *LoopBB;
	MachineBasicBlock *RemainderBB;
	std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false);

	const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);

	auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, LoopBB, DL, Idx,
	InitResultReg, DstReg, PhiReg, TmpExec,
	Offset, UseGPRIdxMode, SGPRIdxReg);

	MachineBasicBlock* LandingPad = MF->CreateMachineBasicBlock();
	MachineFunction::iterator MBBI(LoopBB);
	++MBBI;
	MF->insert(MBBI, LandingPad);
	LoopBB->removeSuccessor(RemainderBB);
	LandingPad->addSuccessor(RemainderBB);
	LoopBB->addSuccessor(LandingPad);
	MachineBasicBlock::iterator First = LandingPad->begin();
	BuildMI(*LandingPad, First, DL, TII->get(MovExecOpc), Exec)
	.addReg(SaveExec);

	return InsPt;
	}

	// Returns subreg index, offset
	static std::pair<unsigned, int>
	computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
	const TargetRegisterClass *SuperRC,
	unsigned VecReg,
	int Offset) {
	int NumElts = TRI.getRegSizeInBits(*SuperRC) / 32;

	// Skip out of bounds offsets, or else we would end up using an undefined
	// register.
	if (Offset >= NumElts \|\| Offset < 0)
	return std::make_pair(AMDGPU::sub0, Offset);

	return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0);
	}

	static void setM0ToIndexFromSGPR(const SIInstrInfo *TII,
	MachineRegisterInfo &MRI, MachineInstr &MI,
	int Offset) {
	MachineBasicBlock *MBB = MI.getParent();
	const DebugLoc &DL = MI.getDebugLoc();
	MachineBasicBlock::iterator I(&MI);

	const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);

	assert(Idx->getReg() != AMDGPU::NoRegister);

	if (Offset == 0) {
	BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0).add(Idx);
	} else {
	BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
	.add(*Idx)
	.addImm(Offset);
	}
	}

	static Register getIndirectSGPRIdx(const SIInstrInfo *TII,
	MachineRegisterInfo &MRI, MachineInstr &MI,
	int Offset) {
	MachineBasicBlock *MBB = MI.getParent();
	const DebugLoc &DL = MI.getDebugLoc();
	MachineBasicBlock::iterator I(&MI);

	const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);

	if (Offset == 0)
	return Idx->getReg();

	Register Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
	BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
	.add(*Idx)
	.addImm(Offset);
	return Tmp;
	}

	static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
	MachineBasicBlock &MBB,
	const GCNSubtarget &ST) {
	const SIInstrInfo *TII = ST.getInstrInfo();
	const SIRegisterInfo &TRI = TII->getRegisterInfo();
	MachineFunction *MF = MBB.getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	Register Dst = MI.getOperand(0).getReg();
	const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
	Register SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
	int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();

	const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg);
	const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());

	unsigned SubReg;
	std::tie(SubReg, Offset)
	= computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);

	const bool UseGPRIdxMode = ST.useVGPRIndexMode();

	// Check for a SGPR index.
	if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
	MachineBasicBlock::iterator I(&MI);
	const DebugLoc &DL = MI.getDebugLoc();

	if (UseGPRIdxMode) {
	// TODO: Look at the uses to avoid the copy. This may require rescheduling
	// to avoid interfering with other uses, so probably requires a new
	// optimization pass.
	Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);

	const MCInstrDesc &GPRIDXDesc =
	TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);
	BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
	.addReg(SrcReg)
	.addReg(Idx)
	.addImm(SubReg);
	} else {
	setM0ToIndexFromSGPR(TII, MRI, MI, Offset);

	BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
	.addReg(SrcReg, 0, SubReg)
	.addReg(SrcReg, RegState::Implicit);
	}

	MI.eraseFromParent();

	return &MBB;
	}

	// Control flow needs to be inserted if indexing with a VGPR.
	const DebugLoc &DL = MI.getDebugLoc();
	MachineBasicBlock::iterator I(&MI);

	Register PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	Register InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

	BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);

	Register SGPRIdxReg;
	auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset,
	UseGPRIdxMode, SGPRIdxReg);

	MachineBasicBlock *LoopBB = InsPt->getParent();

	if (UseGPRIdxMode) {
	const MCInstrDesc &GPRIDXDesc =
	TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), true);

	BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
	.addReg(SrcReg)
	.addReg(SGPRIdxReg)
	.addImm(SubReg);
	} else {
	BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
	.addReg(SrcReg, 0, SubReg)
	.addReg(SrcReg, RegState::Implicit);
	}

	MI.eraseFromParent();

	return LoopBB;
	}

	static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
	MachineBasicBlock &MBB,
	const GCNSubtarget &ST) {
	const SIInstrInfo *TII = ST.getInstrInfo();
	const SIRegisterInfo &TRI = TII->getRegisterInfo();
	MachineFunction *MF = MBB.getParent();
	MachineRegisterInfo &MRI = MF->getRegInfo();

	Register Dst = MI.getOperand(0).getReg();
	const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
	const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
	const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
	int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
	const TargetRegisterClass *VecRC = MRI.getRegClass(SrcVec->getReg());
	const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());

	// This can be an immediate, but will be folded later.
	assert(Val->getReg());

	unsigned SubReg;
	std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
	SrcVec->getReg(),
	Offset);
	const bool UseGPRIdxMode = ST.useVGPRIndexMode();

	if (Idx->getReg() == AMDGPU::NoRegister) {
	MachineBasicBlock::iterator I(&MI);
	const DebugLoc &DL = MI.getDebugLoc();

	assert(Offset == 0);

	BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
	.add(*SrcVec)
	.add(*Val)
	.addImm(SubReg);

	MI.eraseFromParent();
	return &MBB;
	}

	// Check for a SGPR index.
	if (TII->getRegisterInfo().isSGPRClass(IdxRC)) {
	MachineBasicBlock::iterator I(&MI);
	const DebugLoc &DL = MI.getDebugLoc();

	if (UseGPRIdxMode) {
	Register Idx = getIndirectSGPRIdx(TII, MRI, MI, Offset);

	const MCInstrDesc &GPRIDXDesc =
	TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);
	BuildMI(MBB, I, DL, GPRIDXDesc, Dst)
	.addReg(SrcVec->getReg())
	.add(*Val)
	.addReg(Idx)
	.addImm(SubReg);
	} else {
	setM0ToIndexFromSGPR(TII, MRI, MI, Offset);

	const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
	TRI.getRegSizeInBits(*VecRC), 32, false);
	BuildMI(MBB, I, DL, MovRelDesc, Dst)
	.addReg(SrcVec->getReg())
	.add(*Val)
	.addImm(SubReg);
	}
	MI.eraseFromParent();
	return &MBB;
	}

	// Control flow needs to be inserted if indexing with a VGPR.
	if (Val->isReg())
	MRI.clearKillFlags(Val->getReg());

	const DebugLoc &DL = MI.getDebugLoc();

	Register PhiReg = MRI.createVirtualRegister(VecRC);

	Register SGPRIdxReg;
	auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg, Offset,
	UseGPRIdxMode, SGPRIdxReg);
	MachineBasicBlock *LoopBB = InsPt->getParent();

	if (UseGPRIdxMode) {
	const MCInstrDesc &GPRIDXDesc =
	TII->getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC), false);

	BuildMI(*LoopBB, InsPt, DL, GPRIDXDesc, Dst)
	.addReg(PhiReg)
	.add(*Val)
	.addReg(SGPRIdxReg)
	.addImm(AMDGPU::sub0);
	} else {
	const MCInstrDesc &MovRelDesc = TII->getIndirectRegWriteMovRelPseudo(
	TRI.getRegSizeInBits(*VecRC), 32, false);
	BuildMI(*LoopBB, InsPt, DL, MovRelDesc, Dst)
	.addReg(PhiReg)
	.add(*Val)
	.addImm(AMDGPU::sub0);
	}

	MI.eraseFromParent();
	return LoopBB;
	}

	MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
	MachineInstr &MI, MachineBasicBlock *BB) const {

	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	MachineFunction *MF = BB->getParent();
	SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

	switch (MI.getOpcode()) {
	case AMDGPU::S_UADDO_PSEUDO:
	case AMDGPU::S_USUBO_PSEUDO: {
	const DebugLoc &DL = MI.getDebugLoc();
	MachineOperand &Dest0 = MI.getOperand(0);
	MachineOperand &Dest1 = MI.getOperand(1);
	MachineOperand &Src0 = MI.getOperand(2);
	MachineOperand &Src1 = MI.getOperand(3);

	unsigned Opc = (MI.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
	? AMDGPU::S_ADD_I32
	: AMDGPU::S_SUB_I32;
	BuildMI(*BB, MI, DL, TII->get(Opc), Dest0.getReg()).add(Src0).add(Src1);

	BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CSELECT_B64), Dest1.getReg())
	.addImm(1)
	.addImm(0);

	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::S_ADD_U64_PSEUDO:
	case AMDGPU::S_SUB_U64_PSEUDO: {
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	const TargetRegisterClass *BoolRC = TRI->getBoolRC();
	const DebugLoc &DL = MI.getDebugLoc();

	MachineOperand &Dest = MI.getOperand(0);
	MachineOperand &Src0 = MI.getOperand(1);
	MachineOperand &Src1 = MI.getOperand(2);

	Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
	Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

	MachineOperand Src0Sub0 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src0, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
	MachineOperand Src0Sub1 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src0, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);

	MachineOperand Src1Sub0 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src1, BoolRC, AMDGPU::sub0, &AMDGPU::SReg_32RegClass);
	MachineOperand Src1Sub1 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src1, BoolRC, AMDGPU::sub1, &AMDGPU::SReg_32RegClass);

	bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);

	unsigned LoOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
	unsigned HiOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
	BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0).add(Src0Sub0).add(Src1Sub0);
	BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1).add(Src0Sub1).add(Src1Sub1);
	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
	.addReg(DestSub0)
	.addImm(AMDGPU::sub0)
	.addReg(DestSub1)
	.addImm(AMDGPU::sub1);
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::V_ADD_U64_PSEUDO:
	case AMDGPU::V_SUB_U64_PSEUDO: {
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	const DebugLoc &DL = MI.getDebugLoc();

	bool IsAdd = (MI.getOpcode() == AMDGPU::V_ADD_U64_PSEUDO);

	MachineOperand &Dest = MI.getOperand(0);
	MachineOperand &Src0 = MI.getOperand(1);
	MachineOperand &Src1 = MI.getOperand(2);

	if (IsAdd && ST.hasLshlAddB64()) {
	auto Add = BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_LSHL_ADD_U64_e64),
	Dest.getReg())
	.add(Src0)
	.addImm(0)
	.add(Src1);
	TII->legalizeOperands(*Add);
	MI.eraseFromParent();
	return BB;
	}

	const auto *CarryRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);

	Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);

	Register CarryReg = MRI.createVirtualRegister(CarryRC);
	Register DeadCarryReg = MRI.createVirtualRegister(CarryRC);

	const TargetRegisterClass *Src0RC = Src0.isReg()
	? MRI.getRegClass(Src0.getReg())
	: &AMDGPU::VReg_64RegClass;
	const TargetRegisterClass *Src1RC = Src1.isReg()
	? MRI.getRegClass(Src1.getReg())
	: &AMDGPU::VReg_64RegClass;

	const TargetRegisterClass *Src0SubRC =
	TRI->getSubRegClass(Src0RC, AMDGPU::sub0);
	const TargetRegisterClass *Src1SubRC =
	TRI->getSubRegClass(Src1RC, AMDGPU::sub1);

	MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
	MachineOperand SrcReg1Sub0 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);

	MachineOperand SrcReg0Sub1 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
	MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
	MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);

	unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
	MachineInstr LoHalf = BuildMI(BB, MI, DL, TII->get(LoOpc), DestSub0)
	.addReg(CarryReg, RegState::Define)
	.add(SrcReg0Sub0)
	.add(SrcReg1Sub0)
	.addImm(0); // clamp bit

	unsigned HiOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
	MachineInstr *HiHalf =
	BuildMI(*BB, MI, DL, TII->get(HiOpc), DestSub1)
	.addReg(DeadCarryReg, RegState::Define \| RegState::Dead)
	.add(SrcReg0Sub1)
	.add(SrcReg1Sub1)
	.addReg(CarryReg, RegState::Kill)
	.addImm(0); // clamp bit

	BuildMI(*BB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest.getReg())
	.addReg(DestSub0)
	.addImm(AMDGPU::sub0)
	.addReg(DestSub1)
	.addImm(AMDGPU::sub1);
	TII->legalizeOperands(*LoHalf);
	TII->legalizeOperands(*HiHalf);
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::S_ADD_CO_PSEUDO:
	case AMDGPU::S_SUB_CO_PSEUDO: {
	// This pseudo has a chance to be selected
	// only from uniform add/subcarry node. All the VGPR operands
	// therefore assumed to be splat vectors.
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	MachineBasicBlock::iterator MII = MI;
	const DebugLoc &DL = MI.getDebugLoc();
	MachineOperand &Dest = MI.getOperand(0);
	MachineOperand &CarryDest = MI.getOperand(1);
	MachineOperand &Src0 = MI.getOperand(2);
	MachineOperand &Src1 = MI.getOperand(3);
	MachineOperand &Src2 = MI.getOperand(4);
	unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
	? AMDGPU::S_ADDC_U32
	: AMDGPU::S_SUBB_U32;
	if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
	Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
	BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
	.addReg(Src0.getReg());
	Src0.setReg(RegOp0);
	}
	if (Src1.isReg() && TRI->isVectorRegister(MRI, Src1.getReg())) {
	Register RegOp1 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
	BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp1)
	.addReg(Src1.getReg());
	Src1.setReg(RegOp1);
	}
	Register RegOp2 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
	if (TRI->isVectorRegister(MRI, Src2.getReg())) {
	BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp2)
	.addReg(Src2.getReg());
	Src2.setReg(RegOp2);
	}

	const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
	unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
	assert(WaveSize == 64 \|\| WaveSize == 32);

	if (WaveSize == 64) {
	if (ST.hasScalarCompareEq64()) {
	BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
	.addReg(Src2.getReg())
	.addImm(0);
	} else {
	const TargetRegisterClass *SubRC =
	TRI->getSubRegClass(Src2RC, AMDGPU::sub0);
	MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
	MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
	MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
	MII, MRI, Src2, Src2RC, AMDGPU::sub1, SubRC);
	Register Src2_32 = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);

	BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_OR_B32), Src2_32)
	.add(Src2Sub0)
	.add(Src2Sub1);

	BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
	.addReg(Src2_32, RegState::Kill)
	.addImm(0);
	}
	} else {
	BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMPK_LG_U32))
	.addReg(Src2.getReg())
	.addImm(0);
	}

	BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);

	unsigned SelOpc =
	(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;

	BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
	.addImm(-1)
	.addImm(0);

	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::SI_INIT_M0: {
	BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
	TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
	.add(MI.getOperand(0));
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::GET_GROUPSTATICSIZE: {
	assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA \|\|
	getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
	DebugLoc DL = MI.getDebugLoc();
	BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
	.add(MI.getOperand(0))
	.addImm(MFI->getLDSSize());
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::SI_INDIRECT_SRC_V1:
	case AMDGPU::SI_INDIRECT_SRC_V2:
	case AMDGPU::SI_INDIRECT_SRC_V4:
	case AMDGPU::SI_INDIRECT_SRC_V8:
	case AMDGPU::SI_INDIRECT_SRC_V16:
	case AMDGPU::SI_INDIRECT_SRC_V32:
	return emitIndirectSrc(MI, BB, getSubtarget());
	case AMDGPU::SI_INDIRECT_DST_V1:
	case AMDGPU::SI_INDIRECT_DST_V2:
	case AMDGPU::SI_INDIRECT_DST_V4:
	case AMDGPU::SI_INDIRECT_DST_V8:
	case AMDGPU::SI_INDIRECT_DST_V16:
	case AMDGPU::SI_INDIRECT_DST_V32:
	return emitIndirectDst(MI, BB, getSubtarget());
	case AMDGPU::SI_KILL_F32_COND_IMM_PSEUDO:
	case AMDGPU::SI_KILL_I1_PSEUDO:
	return splitKillBlock(MI, BB);
	case AMDGPU::V_CNDMASK_B64_PSEUDO: {
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();

	Register Dst = MI.getOperand(0).getReg();
	Register Src0 = MI.getOperand(1).getReg();
	Register Src1 = MI.getOperand(2).getReg();
	const DebugLoc &DL = MI.getDebugLoc();
	Register SrcCond = MI.getOperand(3).getReg();

	Register DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	Register DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	const auto *CondRC = TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
	Register SrcCondCopy = MRI.createVirtualRegister(CondRC);

	BuildMI(*BB, MI, DL, TII->get(AMDGPU::COPY), SrcCondCopy)
	.addReg(SrcCond);
	BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
	.addImm(0)
	.addReg(Src0, 0, AMDGPU::sub0)
	.addImm(0)
	.addReg(Src1, 0, AMDGPU::sub0)
	.addReg(SrcCondCopy);
	BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
	.addImm(0)
	.addReg(Src0, 0, AMDGPU::sub1)
	.addImm(0)
	.addReg(Src1, 0, AMDGPU::sub1)
	.addReg(SrcCondCopy);

	BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
	.addReg(DstLo)
	.addImm(AMDGPU::sub0)
	.addReg(DstHi)
	.addImm(AMDGPU::sub1);
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::SI_BR_UNDEF: {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();
	MachineInstr Br = BuildMI(BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
	.add(MI.getOperand(0));
	Br->getOperand(1).setIsUndef(true); // read undef SCC
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::ADJCALLSTACKUP:
	case AMDGPU::ADJCALLSTACKDOWN: {
	const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
	MachineInstrBuilder MIB(*MF, &MI);
	MIB.addReg(Info->getStackPtrOffsetReg(), RegState::ImplicitDefine)
	.addReg(Info->getStackPtrOffsetReg(), RegState::Implicit);
	return BB;
	}
	case AMDGPU::SI_CALL_ISEL: {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	const DebugLoc &DL = MI.getDebugLoc();

	unsigned ReturnAddrReg = TII->getRegisterInfo().getReturnAddressReg(*MF);

	MachineInstrBuilder MIB;
	MIB = BuildMI(*BB, MI, DL, TII->get(AMDGPU::SI_CALL), ReturnAddrReg);

	for (const MachineOperand &MO : MI.operands())
	MIB.add(MO);

	MIB.cloneMemRefs(MI);
	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::V_ADD_CO_U32_e32:
	case AMDGPU::V_SUB_CO_U32_e32:
	case AMDGPU::V_SUBREV_CO_U32_e32: {
	// TODO: Define distinct V_*_I32_Pseudo instructions instead.
	const DebugLoc &DL = MI.getDebugLoc();
	unsigned Opc = MI.getOpcode();

	bool NeedClampOperand = false;
	if (TII->pseudoToMCOpcode(Opc) == -1) {
	Opc = AMDGPU::getVOPe64(Opc);
	NeedClampOperand = true;
	}

	auto I = BuildMI(*BB, MI, DL, TII->get(Opc), MI.getOperand(0).getReg());
	if (TII->isVOP3(*I)) {
	const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = ST.getRegisterInfo();
	I.addReg(TRI->getVCC(), RegState::Define);
	}
	I.add(MI.getOperand(1))
	.add(MI.getOperand(2));
	if (NeedClampOperand)
	I.addImm(0); // clamp bit for e64 encoding

	TII->legalizeOperands(*I);

	MI.eraseFromParent();
	return BB;
	}
	case AMDGPU::V_ADDC_U32_e32:
	case AMDGPU::V_SUBB_U32_e32:
	case AMDGPU::V_SUBBREV_U32_e32:
	// These instructions have an implicit use of vcc which counts towards the
	// constant bus limit.
	TII->legalizeOperands(MI);
	return BB;
	case AMDGPU::DS_GWS_INIT:
	case AMDGPU::DS_GWS_SEMA_BR:
	case AMDGPU::DS_GWS_BARRIER:
	TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::data0);
	LLVM_FALLTHROUGH;
	case AMDGPU::DS_GWS_SEMA_V:
	case AMDGPU::DS_GWS_SEMA_P:
	case AMDGPU::DS_GWS_SEMA_RELEASE_ALL:
	// A s_waitcnt 0 is required to be the instruction immediately following.
	if (getSubtarget()->hasGWSAutoReplay()) {
	bundleInstWithWaitcnt(MI);
	return BB;
	}

	return emitGWSMemViolTestLoop(MI, BB);
	case AMDGPU::S_SETREG_B32: {
	// Try to optimize cases that only set the denormal mode or rounding mode.
	//
	// If the s_setreg_b32 fully sets all of the bits in the rounding mode or
	// denormal mode to a constant, we can use s_round_mode or s_denorm_mode
	// instead.
	//
	// FIXME: This could be predicates on the immediate, but tablegen doesn't
	// allow you to have a no side effect instruction in the output of a
	// sideeffecting pattern.
	unsigned ID, Offset, Width;
	AMDGPU::Hwreg::decodeHwreg(MI.getOperand(1).getImm(), ID, Offset, Width);
	if (ID != AMDGPU::Hwreg::ID_MODE)
	return BB;

	const unsigned WidthMask = maskTrailingOnes<unsigned>(Width);
	const unsigned SetMask = WidthMask << Offset;

	if (getSubtarget()->hasDenormModeInst()) {
	unsigned SetDenormOp = 0;
	unsigned SetRoundOp = 0;

	// The dedicated instructions can only set the whole denorm or round mode
	// at once, not a subset of bits in either.
	if (SetMask ==
	(AMDGPU::Hwreg::FP_ROUND_MASK \| AMDGPU::Hwreg::FP_DENORM_MASK)) {
	// If this fully sets both the round and denorm mode, emit the two
	// dedicated instructions for these.
	SetRoundOp = AMDGPU::S_ROUND_MODE;
	SetDenormOp = AMDGPU::S_DENORM_MODE;
	} else if (SetMask == AMDGPU::Hwreg::FP_ROUND_MASK) {
	SetRoundOp = AMDGPU::S_ROUND_MODE;
	} else if (SetMask == AMDGPU::Hwreg::FP_DENORM_MASK) {
	SetDenormOp = AMDGPU::S_DENORM_MODE;
	}

	if (SetRoundOp \|\| SetDenormOp) {
	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
	MachineInstr *Def = MRI.getVRegDef(MI.getOperand(0).getReg());
	if (Def && Def->isMoveImmediate() && Def->getOperand(1).isImm()) {
	unsigned ImmVal = Def->getOperand(1).getImm();
	if (SetRoundOp) {
	BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetRoundOp))
	.addImm(ImmVal & 0xf);

	// If we also have the denorm mode, get just the denorm mode bits.
	ImmVal >>= 4;
	}

	if (SetDenormOp) {
	BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(SetDenormOp))
	.addImm(ImmVal & 0xf);
	}

	MI.eraseFromParent();
	return BB;
	}
	}
	}

	// If only FP bits are touched, used the no side effects pseudo.
	if ((SetMask & (AMDGPU::Hwreg::FP_ROUND_MASK \|
	AMDGPU::Hwreg::FP_DENORM_MASK)) == SetMask)
	MI.setDesc(TII->get(AMDGPU::S_SETREG_B32_mode));

	return BB;
	}
	default:
	return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
	}
	}

	bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
	return isTypeLegal(VT.getScalarType());
	}

	bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
	switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
	case MVT::f32:
	return Subtarget->hasAtomicFaddRtnInsts();
	case MVT::v2f16:
	case MVT::f64:
	return Subtarget->hasGFX90AInsts();
	default:
	return false;
	}
	}

	bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
	// This currently forces unfolding various combinations of fsub into fma with
	// free fneg'd operands. As long as we have fast FMA (controlled by
	// isFMAFasterThanFMulAndFAdd), we should perform these.

	// When fma is quarter rate, for f64 where add / sub are at best half rate,
	// most of these combines appear to be cycle neutral but save on instruction
	// count / code size.
	return true;
	}

	bool SITargetLowering::enableAggressiveFMAFusion(LLT Ty) const { return true; }

	EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
	EVT VT) const {
	if (!VT.isVector()) {
	return MVT::i1;
	}
	return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
	}

	MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
	// TODO: Should i16 be used always if legal? For now it would force VALU
	// shifts.
	return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
	}

	LLT SITargetLowering::getPreferredShiftAmountTy(LLT Ty) const {
	return (Ty.getScalarSizeInBits() <= 16 && Subtarget->has16BitInsts())
	? Ty.changeElementSize(16)
	: Ty.changeElementSize(32);
	}

	// Answering this is somewhat tricky and depends on the specific device which
	// have different rates for fma or all f64 operations.
	//
	// v_fma_f64 and v_mul_f64 always take the same number of cycles as each other
	// regardless of which device (although the number of cycles differs between
	// devices), so it is always profitable for f64.
	//
	// v_fma_f32 takes 4 or 16 cycles depending on the device, so it is profitable
	// only on full rate devices. Normally, we should prefer selecting v_mad_f32
	// which we can always do even without fused FP ops since it returns the same
	// result as the separate operations and since it is always full
	// rate. Therefore, we lie and report that it is not faster for f32. v_mad_f32
	// however does not support denormals, so we do report fma as faster if we have
	// a fast fma device and require denormals.
	//
	bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
	EVT VT) const {
	VT = VT.getScalarType();

	switch (VT.getSimpleVT().SimpleTy) {
	case MVT::f32: {
	// If mad is not available this depends only on if f32 fma is full rate.
	if (!Subtarget->hasMadMacF32Insts())
	return Subtarget->hasFastFMAF32();

	// Otherwise f32 mad is always full rate and returns the same result as
	// the separate operations so should be preferred over fma.
	// However does not support denormals.
	if (hasFP32Denormals(MF))
	return Subtarget->hasFastFMAF32() \|\| Subtarget->hasDLInsts();

	// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
	return Subtarget->hasFastFMAF32() && Subtarget->hasDLInsts();
	}
	case MVT::f64:
	return true;
	case MVT::f16:
	return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
	default:
	break;
	}

	return false;
	}

	bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
	LLT Ty) const {
	switch (Ty.getScalarSizeInBits()) {
	case 16:
	return isFMAFasterThanFMulAndFAdd(MF, MVT::f16);
	case 32:
	return isFMAFasterThanFMulAndFAdd(MF, MVT::f32);
	case 64:
	return isFMAFasterThanFMulAndFAdd(MF, MVT::f64);
	default:
	break;
	}

	return false;
	}

	bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
	if (!Ty.isScalar())
	return false;

	if (Ty.getScalarSizeInBits() == 16)
	return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
	if (Ty.getScalarSizeInBits() == 32)
	return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());

	return false;
	}

	bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
	const SDNode *N) const {
	// TODO: Check future ftz flag
	// v_mad_f32/v_mac_f32 do not support denormals.
	EVT VT = N->getValueType(0);
	if (VT == MVT::f32)
	return Subtarget->hasMadMacF32Insts() &&
	!hasFP32Denormals(DAG.getMachineFunction());
	if (VT == MVT::f16) {
	return Subtarget->hasMadF16() &&
	!hasFP64FP16Denormals(DAG.getMachineFunction());
	}

	return false;
	}

	//===----------------------------------------------------------------------===//
	// Custom DAG Lowering Operations
	//===----------------------------------------------------------------------===//

	// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
	// wider vector type is legal.
	SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	assert(VT == MVT::v4f16 \|\| VT == MVT::v4i16);

	SDValue Lo, Hi;
	std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);

	SDLoc SL(Op);
	SDValue OpLo = DAG.getNode(Opc, SL, Lo.getValueType(), Lo,
	Op->getFlags());
	SDValue OpHi = DAG.getNode(Opc, SL, Hi.getValueType(), Hi,
	Op->getFlags());

	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
	}

	// Work around LegalizeDAG doing the wrong thing and fully scalarizing if the
	// wider vector type is legal.
	SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	assert(VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v4f32 \|\|
	VT == MVT::v8i16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v16i16 \|\|
	VT == MVT::v16f16 \|\| VT == MVT::v8f32 \|\| VT == MVT::v16f32 \|\|
	VT == MVT::v32f32);

	SDValue Lo0, Hi0;
	std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
	SDValue Lo1, Hi1;
	std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);

	SDLoc SL(Op);

	SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1,
	Op->getFlags());
	SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1,
	Op->getFlags());

	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
	}

	SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned Opc = Op.getOpcode();
	EVT VT = Op.getValueType();
	assert(VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\| VT == MVT::v8i16 \|\|
	VT == MVT::v8f16 \|\| VT == MVT::v4f32 \|\| VT == MVT::v16i16 \|\|
	VT == MVT::v16f16 \|\| VT == MVT::v8f32 \|\| VT == MVT::v16f32 \|\|
	VT == MVT::v32f32);

	SDValue Lo0, Hi0;
	SDValue Op0 = Op.getOperand(0);
	std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
	? DAG.SplitVectorOperand(Op.getNode(), 0)
	: std::make_pair(Op0, Op0);
	SDValue Lo1, Hi1;
	std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
	SDValue Lo2, Hi2;
	std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);

	SDLoc SL(Op);
	auto ResVT = DAG.GetSplitDestVTs(VT);

	SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
	Op->getFlags());
	SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
	Op->getFlags());

	return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
	}


	SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
	switch (Op.getOpcode()) {
	default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
	case ISD::BRCOND: return LowerBRCOND(Op, DAG);
	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
	case ISD::LOAD: {
	SDValue Result = LowerLOAD(Op, DAG);
	assert((!Result.getNode() \|\|
	Result.getNode()->getNumValues() == 2) &&
	"Load should return a value and a chain");
	return Result;
	}

	case ISD::FSIN:
	case ISD::FCOS:
	return LowerTrig(Op, DAG);
	case ISD::SELECT: return LowerSELECT(Op, DAG);
	case ISD::FDIV: return LowerFDIV(Op, DAG);
	case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
	case ISD::STORE: return LowerSTORE(Op, DAG);
	case ISD::GlobalAddress: {
	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	return LowerGlobalAddress(MFI, Op, DAG);
	}
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG);
	case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
	case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
	case ISD::INSERT_SUBVECTOR:
	return lowerINSERT_SUBVECTOR(Op, DAG);
	case ISD::INSERT_VECTOR_ELT:
	return lowerINSERT_VECTOR_ELT(Op, DAG);
	case ISD::EXTRACT_VECTOR_ELT:
	return lowerEXTRACT_VECTOR_ELT(Op, DAG);
	case ISD::VECTOR_SHUFFLE:
	return lowerVECTOR_SHUFFLE(Op, DAG);
	case ISD::SCALAR_TO_VECTOR:
	return lowerSCALAR_TO_VECTOR(Op, DAG);
	case ISD::BUILD_VECTOR:
	return lowerBUILD_VECTOR(Op, DAG);
	case ISD::FP_ROUND:
	return lowerFP_ROUND(Op, DAG);
	case ISD::FPTRUNC_ROUND: {
	unsigned Opc;
	SDLoc DL(Op);

	if (Op.getOperand(0)->getValueType(0) != MVT::f32)
	return SDValue();

	// Get the rounding mode from the last operand
	int RoundMode = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	if (RoundMode == (int)RoundingMode::TowardPositive)
	Opc = AMDGPUISD::FPTRUNC_ROUND_UPWARD;
	else if (RoundMode == (int)RoundingMode::TowardNegative)
	Opc = AMDGPUISD::FPTRUNC_ROUND_DOWNWARD;
	else
	return SDValue();

	return DAG.getNode(Opc, DL, Op.getNode()->getVTList(), Op->getOperand(0));
	}
	case ISD::TRAP:
	return lowerTRAP(Op, DAG);
	case ISD::DEBUGTRAP:
	return lowerDEBUGTRAP(Op, DAG);
	case ISD::FABS:
	case ISD::FNEG:
	case ISD::FCANONICALIZE:
	case ISD::BSWAP:
	return splitUnaryVectorOp(Op, DAG);
	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	return lowerFMINNUM_FMAXNUM(Op, DAG);
	case ISD::FMA:
	return splitTernaryVectorOp(Op, DAG);
	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	return LowerFP_TO_INT(Op, DAG);
	case ISD::SHL:
	case ISD::SRA:
	case ISD::SRL:
	case ISD::ADD:
	case ISD::SUB:
	case ISD::MUL:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::UMIN:
	case ISD::UMAX:
	case ISD::FADD:
	case ISD::FMUL:
	case ISD::FMINNUM_IEEE:
	case ISD::FMAXNUM_IEEE:
	case ISD::UADDSAT:
	case ISD::USUBSAT:
	case ISD::SADDSAT:
	case ISD::SSUBSAT:
	return splitBinaryVectorOp(Op, DAG);
	case ISD::SMULO:
	case ISD::UMULO:
	return lowerXMULO(Op, DAG);
	case ISD::SMUL_LOHI:
	case ISD::UMUL_LOHI:
	return lowerXMUL_LOHI(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC:
	return LowerDYNAMIC_STACKALLOC(Op, DAG);
	}
	return SDValue();
	}

	// Used for D16: Casts the result of an instruction into the right vector,
	// packs values if loads return unpacked values.
	static SDValue adjustLoadValueTypeImpl(SDValue Result, EVT LoadVT,
	const SDLoc &DL,
	SelectionDAG &DAG, bool Unpacked) {
	if (!LoadVT.isVector())
	return Result;

	// Cast back to the original packed type or to a larger type that is a
	// multiple of 32 bit for D16. Widening the return type is a required for
	// legalization.
	EVT FittingLoadVT = LoadVT;
	if ((LoadVT.getVectorNumElements() % 2) == 1) {
	FittingLoadVT =
	EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
	LoadVT.getVectorNumElements() + 1);
	}

	if (Unpacked) { // From v2i32/v4i32 back to v2f16/v4f16.
	// Truncate to v2i16/v4i16.
	EVT IntLoadVT = FittingLoadVT.changeTypeToInteger();

	// Workaround legalizer not scalarizing truncate after vector op
	// legalization but not creating intermediate vector trunc.
	SmallVector<SDValue, 4> Elts;
	DAG.ExtractVectorElements(Result, Elts);
	for (SDValue &Elt : Elts)
	Elt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Elt);

	// Pad illegal v1i16/v3fi6 to v4i16
	if ((LoadVT.getVectorNumElements() % 2) == 1)
	Elts.push_back(DAG.getUNDEF(MVT::i16));

	Result = DAG.getBuildVector(IntLoadVT, DL, Elts);

	// Bitcast to original type (v2f16/v4f16).
	return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
	}

	// Cast back to the original packed type.
	return DAG.getNode(ISD::BITCAST, DL, FittingLoadVT, Result);
	}

	SDValue SITargetLowering::adjustLoadValueType(unsigned Opcode,
	MemSDNode *M,
	SelectionDAG &DAG,
	ArrayRef<SDValue> Ops,
	bool IsIntrinsic) const {
	SDLoc DL(M);

	bool Unpacked = Subtarget->hasUnpackedD16VMem();
	EVT LoadVT = M->getValueType(0);

	EVT EquivLoadVT = LoadVT;
	if (LoadVT.isVector()) {
	if (Unpacked) {
	EquivLoadVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
	LoadVT.getVectorNumElements());
	} else if ((LoadVT.getVectorNumElements() % 2) == 1) {
	// Widen v3f16 to legal type
	EquivLoadVT =
	EVT::getVectorVT(*DAG.getContext(), LoadVT.getVectorElementType(),
	LoadVT.getVectorNumElements() + 1);
	}
	}

	// Change from v4f16/v2f16 to EquivLoadVT.
	SDVTList VTList = DAG.getVTList(EquivLoadVT, MVT::Other);

	SDValue Load
	= DAG.getMemIntrinsicNode(
	IsIntrinsic ? (unsigned)ISD::INTRINSIC_W_CHAIN : Opcode, DL,
	VTList, Ops, M->getMemoryVT(),
	M->getMemOperand());

	SDValue Adjusted = adjustLoadValueTypeImpl(Load, LoadVT, DL, DAG, Unpacked);

	return DAG.getMergeValues({ Adjusted, Load.getValue(1) }, DL);
	}

	SDValue SITargetLowering::lowerIntrinsicLoad(MemSDNode *M, bool IsFormat,
	SelectionDAG &DAG,
	ArrayRef<SDValue> Ops) const {
	SDLoc DL(M);
	EVT LoadVT = M->getValueType(0);
	EVT EltType = LoadVT.getScalarType();
	EVT IntVT = LoadVT.changeTypeToInteger();

	bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);

	unsigned Opc =
	IsFormat ? AMDGPUISD::BUFFER_LOAD_FORMAT : AMDGPUISD::BUFFER_LOAD;

	if (IsD16) {
	return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16, M, DAG, Ops);
	}

	// Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
	if (!IsD16 && !LoadVT.isVector() && EltType.getSizeInBits() < 32)
	return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);

	if (isTypeLegal(LoadVT)) {
	return getMemIntrinsicNode(Opc, DL, M->getVTList(), Ops, IntVT,
	M->getMemOperand(), DAG);
	}

	EVT CastVT = getEquivalentMemType(*DAG.getContext(), LoadVT);
	SDVTList VTList = DAG.getVTList(CastVT, MVT::Other);
	SDValue MemNode = getMemIntrinsicNode(Opc, DL, VTList, Ops, CastVT,
	M->getMemOperand(), DAG);
	return DAG.getMergeValues(
	{DAG.getNode(ISD::BITCAST, DL, LoadVT, MemNode), MemNode.getValue(1)},
	DL);
	}

	static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
	SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	const auto *CD = cast<ConstantSDNode>(N->getOperand(3));
	unsigned CondCode = CD->getZExtValue();
	if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(CondCode)))
	return DAG.getUNDEF(VT);

	ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);

	SDValue LHS = N->getOperand(1);
	SDValue RHS = N->getOperand(2);

	SDLoc DL(N);

	EVT CmpVT = LHS.getValueType();
	if (CmpVT == MVT::i16 && !TLI.isTypeLegal(MVT::i16)) {
	unsigned PromoteOp = ICmpInst::isSigned(IcInput) ?
	ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
	LHS = DAG.getNode(PromoteOp, DL, MVT::i32, LHS);
	RHS = DAG.getNode(PromoteOp, DL, MVT::i32, RHS);
	}

	ISD::CondCode CCOpcode = getICmpCondCode(IcInput);

	unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
	EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);

	SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
	DAG.getCondCode(CCOpcode));
	if (VT.bitsEq(CCVT))
	return SetCC;
	return DAG.getZExtOrTrunc(SetCC, DL, VT);
	}

	static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
	SDNode *N, SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	const auto *CD = cast<ConstantSDNode>(N->getOperand(3));

	unsigned CondCode = CD->getZExtValue();
	if (!FCmpInst::isFPPredicate(static_cast<FCmpInst::Predicate>(CondCode)))
	return DAG.getUNDEF(VT);

	SDValue Src0 = N->getOperand(1);
	SDValue Src1 = N->getOperand(2);
	EVT CmpVT = Src0.getValueType();
	SDLoc SL(N);

	if (CmpVT == MVT::f16 && !TLI.isTypeLegal(CmpVT)) {
	Src0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
	Src1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
	}

	FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
	ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
	unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
	EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
	SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
	Src1, DAG.getCondCode(CCOpcode));
	if (VT.bitsEq(CCVT))
	return SetCC;
	return DAG.getZExtOrTrunc(SetCC, SL, VT);
	}

	static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
	SelectionDAG &DAG) {
	EVT VT = N->getValueType(0);
	SDValue Src = N->getOperand(1);
	SDLoc SL(N);

	if (Src.getOpcode() == ISD::SETCC) {
	// (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...)
	return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0),
	Src.getOperand(1), Src.getOperand(2));
	}
	if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
	// (ballot 0) -> 0
	if (Arg->isZero())
	return DAG.getConstant(0, SL, VT);

	// (ballot 1) -> EXEC/EXEC_LO
	if (Arg->isOne()) {
	Register Exec;
	if (VT.getScalarSizeInBits() == 32)
	Exec = AMDGPU::EXEC_LO;
	else if (VT.getScalarSizeInBits() == 64)
	Exec = AMDGPU::EXEC;
	else
	return SDValue();

	return DAG.getCopyFromReg(DAG.getEntryNode(), SL, Exec, VT);
	}
	}

	// (ballot (i1 $src)) -> (AMDGPUISD::SETCC (i32 (zext $src)) (i32 0)
	// ISD::SETNE)
	return DAG.getNode(
	AMDGPUISD::SETCC, SL, VT, DAG.getZExtOrTrunc(Src, SL, MVT::i32),
	DAG.getConstant(0, SL, MVT::i32), DAG.getCondCode(ISD::SETNE));
	}

	void SITargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue> &Results,
	SelectionDAG &DAG) const {
	switch (N->getOpcode()) {
	case ISD::INSERT_VECTOR_ELT: {
	if (SDValue Res = lowerINSERT_VECTOR_ELT(SDValue(N, 0), DAG))
	Results.push_back(Res);
	return;
	}
	case ISD::EXTRACT_VECTOR_ELT: {
	if (SDValue Res = lowerEXTRACT_VECTOR_ELT(SDValue(N, 0), DAG))
	Results.push_back(Res);
	return;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
	switch (IID) {
	case Intrinsic::amdgcn_cvt_pkrtz: {
	SDValue Src0 = N->getOperand(1);
	SDValue Src1 = N->getOperand(2);
	SDLoc SL(N);
	SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, SL, MVT::i32,
	Src0, Src1);
	Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
	return;
	}
	case Intrinsic::amdgcn_cvt_pknorm_i16:
	case Intrinsic::amdgcn_cvt_pknorm_u16:
	case Intrinsic::amdgcn_cvt_pk_i16:
	case Intrinsic::amdgcn_cvt_pk_u16: {
	SDValue Src0 = N->getOperand(1);
	SDValue Src1 = N->getOperand(2);
	SDLoc SL(N);
	unsigned Opcode;

	if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
	Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
	else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
	Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
	else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
	Opcode = AMDGPUISD::CVT_PK_I16_I32;
	else
	Opcode = AMDGPUISD::CVT_PK_U16_U32;

	EVT VT = N->getValueType(0);
	if (isTypeLegal(VT))
	Results.push_back(DAG.getNode(Opcode, SL, VT, Src0, Src1));
	else {
	SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
	Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
	}
	return;
	}
	}
	break;
	}
	case ISD::INTRINSIC_W_CHAIN: {
	if (SDValue Res = LowerINTRINSIC_W_CHAIN(SDValue(N, 0), DAG)) {
	if (Res.getOpcode() == ISD::MERGE_VALUES) {
	// FIXME: Hacky
	for (unsigned I = 0; I < Res.getNumOperands(); I++) {
	Results.push_back(Res.getOperand(I));
	}
	} else {
	Results.push_back(Res);
	Results.push_back(Res.getValue(1));
	}
	return;
	}

	break;
	}
	case ISD::SELECT: {
	SDLoc SL(N);
	EVT VT = N->getValueType(0);
	EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
	SDValue LHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(1));
	SDValue RHS = DAG.getNode(ISD::BITCAST, SL, NewVT, N->getOperand(2));

	EVT SelectVT = NewVT;
	if (NewVT.bitsLT(MVT::i32)) {
	LHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, LHS);
	RHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, RHS);
	SelectVT = MVT::i32;
	}

	SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, SelectVT,
	N->getOperand(0), LHS, RHS);

	if (NewVT != SelectVT)
	NewSelect = DAG.getNode(ISD::TRUNCATE, SL, NewVT, NewSelect);
	Results.push_back(DAG.getNode(ISD::BITCAST, SL, VT, NewSelect));
	return;
	}
	case ISD::FNEG: {
	if (N->getValueType(0) != MVT::v2f16)
	break;

	SDLoc SL(N);
	SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));

	SDValue Op = DAG.getNode(ISD::XOR, SL, MVT::i32,
	BC,
	DAG.getConstant(0x80008000, SL, MVT::i32));
	Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Op));
	return;
	}
	case ISD::FABS: {
	if (N->getValueType(0) != MVT::v2f16)
	break;

	SDLoc SL(N);
	SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::i32, N->getOperand(0));

	SDValue Op = DAG.getNode(ISD::AND, SL, MVT::i32,
	BC,
	DAG.getConstant(0x7fff7fff, SL, MVT::i32));
	Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Op));
	return;
	}
	default:
	break;
	}
	}

	/// Helper function for LowerBRCOND
	static SDNode *findUser(SDValue Value, unsigned Opcode) {

	SDNode *Parent = Value.getNode();
	for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
	I != E; ++I) {

	if (I.getUse().get() != Value)
	continue;

	if (I->getOpcode() == Opcode)
	return *I;
	}
	return nullptr;
	}

	unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
	if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
	switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
	case Intrinsic::amdgcn_if:
	return AMDGPUISD::IF;
	case Intrinsic::amdgcn_else:
	return AMDGPUISD::ELSE;
	case Intrinsic::amdgcn_loop:
	return AMDGPUISD::LOOP;
	case Intrinsic::amdgcn_end_cf:
	llvm_unreachable("should not occur");
	default:
	return 0;
	}
	}

	// break, if_break, else_break are all only used as inputs to loop, not
	// directly as branch conditions.
	return 0;
	}

	bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
	const Triple &TT = getTargetMachine().getTargetTriple();
	return (GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|
	GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
	AMDGPU::shouldEmitConstantsToTextSection(TT);
	}

	bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
	// FIXME: Either avoid relying on address space here or change the default
	// address space for functions to avoid the explicit check.
	return (GV->getValueType()->isFunctionTy() \|\|
	!isNonGlobalAddrSpace(GV->getAddressSpace())) &&
	!shouldEmitFixup(GV) &&
	!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
	}

	bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
	return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
	}

	bool SITargetLowering::shouldUseLDSConstAddress(const GlobalValue *GV) const {
	if (!GV->hasExternalLinkage())
	return true;

	const auto OS = getTargetMachine().getTargetTriple().getOS();
	return OS == Triple::AMDHSA \|\| OS == Triple::AMDPAL;
	}

	/// This transforms the control flow intrinsics to get the branch destination as
	/// last parameter, also switches branch target with BR if the need arise
	SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
	SelectionDAG &DAG) const {
	SDLoc DL(BRCOND);

	SDNode *Intr = BRCOND.getOperand(1).getNode();
	SDValue Target = BRCOND.getOperand(2);
	SDNode *BR = nullptr;
	SDNode *SetCC = nullptr;

	if (Intr->getOpcode() == ISD::SETCC) {
	// As long as we negate the condition everything is fine
	SetCC = Intr;
	Intr = SetCC->getOperand(0).getNode();

	} else {
	// Get the target from BR if we don't negate the condition
	BR = findUser(BRCOND, ISD::BR);
	assert(BR && "brcond missing unconditional branch user");
	Target = BR->getOperand(1);
	}

	unsigned CFNode = isCFIntrinsic(Intr);
	if (CFNode == 0) {
	// This is a uniform branch so we don't need to legalize.
	return BRCOND;
	}

	bool HaveChain = Intr->getOpcode() == ISD::INTRINSIC_VOID \|\|
	Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN;

	assert(!SetCC \|\|
	(SetCC->getConstantOperandVal(1) == 1 &&
	cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
	ISD::SETNE));

	// operands of the new intrinsic call
	SmallVector<SDValue, 4> Ops;
	if (HaveChain)
	Ops.push_back(BRCOND.getOperand(0));

	Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end());
	Ops.push_back(Target);

	ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());

	// build the new intrinsic call
	SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode();

	if (!HaveChain) {
	SDValue Ops[] = {
	SDValue(Result, 0),
	BRCOND.getOperand(0)
	};

	Result = DAG.getMergeValues(Ops, DL).getNode();
	}

	if (BR) {
	// Give the branch instruction our target
	SDValue Ops[] = {
	BR->getOperand(0),
	BRCOND.getOperand(2)
	};
	SDValue NewBR = DAG.getNode(ISD::BR, DL, BR->getVTList(), Ops);
	DAG.ReplaceAllUsesWith(BR, NewBR.getNode());
	}

	SDValue Chain = SDValue(Result, Result->getNumValues() - 1);

	// Copy the intrinsic results to registers
	for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
	SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
	if (!CopyToReg)
	continue;

	Chain = DAG.getCopyToReg(
	Chain, DL,
	CopyToReg->getOperand(1),
	SDValue(Result, i - 1),
	SDValue());

	DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
	}

	// Remove the old intrinsic from the chain
	DAG.ReplaceAllUsesOfValueWith(
	SDValue(Intr, Intr->getNumValues() - 1),
	Intr->getOperand(0));

	return Chain;
	}

	SDValue SITargetLowering::LowerRETURNADDR(SDValue Op,
	SelectionDAG &DAG) const {
	MVT VT = Op.getSimpleValueType();
	SDLoc DL(Op);
	// Checking the depth
	if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() != 0)
	return DAG.getConstant(0, DL, VT);

	MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	// Check for kernel and shader functions
	if (Info->isEntryFunction())
	return DAG.getConstant(0, DL, VT);

	MachineFrameInfo &MFI = MF.getFrameInfo();
	// There is a call to @llvm.returnaddress in this function
	MFI.setReturnAddressIsTaken(true);

	const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
	// Get the return address reg and mark it as an implicit live-in
	Register Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT, Op.getNode()->isDivergent()));

	return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
	}

	SDValue SITargetLowering::getFPExtOrFPRound(SelectionDAG &DAG,
	SDValue Op,
	const SDLoc &DL,
	EVT VT) const {
	return Op.getValueType().bitsLE(VT) ?
	DAG.getNode(ISD::FP_EXTEND, DL, VT, Op) :
	DAG.getNode(ISD::FP_ROUND, DL, VT, Op,
	DAG.getTargetConstant(0, DL, MVT::i32));
	}

	SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
	assert(Op.getValueType() == MVT::f16 &&
	"Do not know how to custom lower FP_ROUND for non-f16 type");

	SDValue Src = Op.getOperand(0);
	EVT SrcVT = Src.getValueType();
	if (SrcVT != MVT::f64)
	return Op;

	SDLoc DL(Op);

	SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
	return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);
	}

	SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
	SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	const MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	bool IsIEEEMode = Info->getMode().IEEE;

	// FIXME: Assert during selection that this is only selected for
	// ieee_mode. Currently a combine can produce the ieee version for non-ieee
	// mode functions, but this happens to be OK since it's only done in cases
	// where there is known no sNaN.
	if (IsIEEEMode)
	return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);

	if (VT == MVT::v4f16 \|\| VT == MVT::v8f16 \|\| VT == MVT::v16f16)
	return splitBinaryVectorOp(Op, DAG);
	return Op;
	}

	SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	SDLoc SL(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	bool isSigned = Op.getOpcode() == ISD::SMULO;

	if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
	const APInt &C = RHSC->getAPIntValue();
	// mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
	if (C.isPowerOf2()) {
	// smulo(x, signed_min) is same as umulo(x, signed_min).
	bool UseArithShift = isSigned && !C.isMinSignedValue();
	SDValue ShiftAmt = DAG.getConstant(C.logBase2(), SL, MVT::i32);
	SDValue Result = DAG.getNode(ISD::SHL, SL, VT, LHS, ShiftAmt);
	SDValue Overflow = DAG.getSetCC(SL, MVT::i1,
	DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
	SL, VT, Result, ShiftAmt),
	LHS, ISD::SETNE);
	return DAG.getMergeValues({ Result, Overflow }, SL);
	}
	}

	SDValue Result = DAG.getNode(ISD::MUL, SL, VT, LHS, RHS);
	SDValue Top = DAG.getNode(isSigned ? ISD::MULHS : ISD::MULHU,
	SL, VT, LHS, RHS);

	SDValue Sign = isSigned
	? DAG.getNode(ISD::SRA, SL, VT, Result,
	DAG.getConstant(VT.getScalarSizeInBits() - 1, SL, MVT::i32))
	: DAG.getConstant(0, SL, VT);
	SDValue Overflow = DAG.getSetCC(SL, MVT::i1, Top, Sign, ISD::SETNE);

	return DAG.getMergeValues({ Result, Overflow }, SL);
	}

	SDValue SITargetLowering::lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const {
	if (Op->isDivergent()) {
	// Select to V_MAD_[IU]64_[IU]32.
	return Op;
	}
	if (Subtarget->hasSMulHi()) {
	// Expand to S_MUL_I32 + S_MUL_HI_[IU]32.
	return SDValue();
	}
	// The multiply is uniform but we would have to use V_MUL_HI_[IU]32 to
	// calculate the high part, so we might as well do the whole thing with
	// V_MAD_[IU]64_[IU]32.
	return Op;
	}

	SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
	if (!Subtarget->isTrapHandlerEnabled() \|\|
	Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
	return lowerTrapEndpgm(Op, DAG);

	if (Optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) {
	switch (*HsaAbiVer) {
	case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
	case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
	return lowerTrapHsaQueuePtr(Op, DAG);
	case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
	case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
	return Subtarget->supportsGetDoorbellID() ?
	lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
	}
	}

	llvm_unreachable("Unknown trap handler");
	}

	SDValue SITargetLowering::lowerTrapEndpgm(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Chain = Op.getOperand(0);
	return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
	}

	SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
	const SDLoc &DL, Align Alignment, ImplicitParameter Param) const {
	MachineFunction &MF = DAG.getMachineFunction();
	uint64_t Offset = getImplicitParameterOffset(MF, Param);
	SDValue Ptr = lowerKernArgParameterPtr(DAG, DL, DAG.getEntryNode(), Offset);
	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
	return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, PtrInfo, Alignment,
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	}

	SDValue SITargetLowering::lowerTrapHsaQueuePtr(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Chain = Op.getOperand(0);

	SDValue QueuePtr;
	// For code object version 5, QueuePtr is passed through implicit kernarg.
	if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
	QueuePtr =
	loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR);
	} else {
	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	Register UserSGPR = Info->getQueuePtrUserSGPR();

	if (UserSGPR == AMDGPU::NoRegister) {
	// We probably are in a function incorrectly marked with
	// amdgpu-no-queue-ptr. This is undefined. We don't want to delete the
	// trap, so just use a null pointer.
	QueuePtr = DAG.getConstant(0, SL, MVT::i64);
	} else {
	QueuePtr = CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, UserSGPR,
	MVT::i64);
	}
	}

	SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
	SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
	QueuePtr, SDValue());

	uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
	SDValue Ops[] = {
	ToReg,
	DAG.getTargetConstant(TrapID, SL, MVT::i16),
	SGPR01,
	ToReg.getValue(1)
	};
	return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
	}

	SDValue SITargetLowering::lowerTrapHsa(
	SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Chain = Op.getOperand(0);

	uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSATrap);
	SDValue Ops[] = {
	Chain,
	DAG.getTargetConstant(TrapID, SL, MVT::i16)
	};
	return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
	}

	SDValue SITargetLowering::lowerDEBUGTRAP(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue Chain = Op.getOperand(0);
	MachineFunction &MF = DAG.getMachineFunction();

	if (!Subtarget->isTrapHandlerEnabled() \|\|
	Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
	DiagnosticInfoUnsupported NoTrap(MF.getFunction(),
	"debugtrap handler not supported",
	Op.getDebugLoc(),
	DS_Warning);
	LLVMContext &Ctx = MF.getFunction().getContext();
	Ctx.diagnose(NoTrap);
	return Chain;
	}

	uint64_t TrapID = static_cast<uint64_t>(GCNSubtarget::TrapID::LLVMAMDHSADebugTrap);
	SDValue Ops[] = {
	Chain,
	DAG.getTargetConstant(TrapID, SL, MVT::i16)
	};
	return DAG.getNode(AMDGPUISD::TRAP, SL, MVT::Other, Ops);
	}

	SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
	SelectionDAG &DAG) const {
	// FIXME: Use inline constants (src_{shared, private}_base) instead.
	if (Subtarget->hasApertureRegs()) {
	unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
	AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
	AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
	unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
	AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
	AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
	unsigned Encoding =
	AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ \|
	Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ \|
	WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;

	SDValue EncodingImm = DAG.getTargetConstant(Encoding, DL, MVT::i16);
	SDValue ApertureReg = SDValue(
	DAG.getMachineNode(AMDGPU::S_GETREG_B32, DL, MVT::i32, EncodingImm), 0);
	SDValue ShiftAmount = DAG.getTargetConstant(WidthM1 + 1, DL, MVT::i32);
	return DAG.getNode(ISD::SHL, DL, MVT::i32, ApertureReg, ShiftAmount);
	}

	// For code object version 5, private_base and shared_base are passed through
	// implicit kernargs.
	if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
	ImplicitParameter Param =
	(AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE;
	return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param);
	}

	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	Register UserSGPR = Info->getQueuePtrUserSGPR();
	if (UserSGPR == AMDGPU::NoRegister) {
	// We probably are in a function incorrectly marked with
	// amdgpu-no-queue-ptr. This is undefined.
	return DAG.getUNDEF(MVT::i32);
	}

	SDValue QueuePtr = CreateLiveInRegister(
	DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);

	// Offset into amd_queue_t for group_segment_aperture_base_hi /
	// private_segment_aperture_base_hi.
	uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;

	SDValue Ptr =
	DAG.getObjectPtrOffset(DL, QueuePtr, TypeSize::Fixed(StructOffset));

	// TODO: Use custom target PseudoSourceValue.
	// TODO: We should use the value from the IR intrinsic call, but it might not
	// be available and how do we get it?
	MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);
	return DAG.getLoad(MVT::i32, DL, QueuePtr.getValue(1), Ptr, PtrInfo,
	commonAlignment(Align(64), StructOffset),
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	}

	/// Return true if the value is a known valid address, such that a null check is
	/// not necessary.
	static bool isKnownNonNull(SDValue Val, SelectionDAG &DAG,
	const AMDGPUTargetMachine &TM, unsigned AddrSpace) {
	if (isa<FrameIndexSDNode>(Val) \|\| isa<GlobalAddressSDNode>(Val) \|\|
	isa<BasicBlockSDNode>(Val))
	return true;

	if (auto *ConstVal = dyn_cast<ConstantSDNode>(Val))
	return ConstVal->getSExtValue() != TM.getNullPointerValue(AddrSpace);

	// TODO: Search through arithmetic, handle arguments and loads
	// marked nonnull.
	return false;
	}

	SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);

	SDValue Src = ASC->getOperand(0);
	SDValue FlatNullPtr = DAG.getConstant(0, SL, MVT::i64);
	unsigned SrcAS = ASC->getSrcAddressSpace();

	const AMDGPUTargetMachine &TM =
	static_cast<const AMDGPUTargetMachine &>(getTargetMachine());

	// flat -> local/private
	if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
	unsigned DestAS = ASC->getDestAddressSpace();

	if (DestAS == AMDGPUAS::LOCAL_ADDRESS \|\|
	DestAS == AMDGPUAS::PRIVATE_ADDRESS) {
	SDValue Ptr = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);

	if (isKnownNonNull(Src, DAG, TM, SrcAS))
	return Ptr;

	unsigned NullVal = TM.getNullPointerValue(DestAS);
	SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);
	SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE);

	return DAG.getNode(ISD::SELECT, SL, MVT::i32, NonNull, Ptr,
	SegmentNullPtr);
	}
	}

	// local/private -> flat
	if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) {
	if (SrcAS == AMDGPUAS::LOCAL_ADDRESS \|\|
	SrcAS == AMDGPUAS::PRIVATE_ADDRESS) {

	SDValue Aperture = getSegmentAperture(ASC->getSrcAddressSpace(), SL, DAG);
	SDValue CvtPtr =
	DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Aperture);
	CvtPtr = DAG.getNode(ISD::BITCAST, SL, MVT::i64, CvtPtr);

	if (isKnownNonNull(Src, DAG, TM, SrcAS))
	return CvtPtr;

	unsigned NullVal = TM.getNullPointerValue(SrcAS);
	SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32);

	SDValue NonNull
	= DAG.getSetCC(SL, MVT::i1, Src, SegmentNullPtr, ISD::SETNE);

	return DAG.getNode(ISD::SELECT, SL, MVT::i64, NonNull, CvtPtr,
	FlatNullPtr);
	}
	}

	if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
	Op.getValueType() == MVT::i64) {
	const SIMachineFunctionInfo *Info =
	DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
	SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
	SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
	}

	if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
	Src.getValueType() == MVT::i64)
	return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);

	// global <-> flat are no-ops and never emitted.

	const MachineFunction &MF = DAG.getMachineFunction();
	DiagnosticInfoUnsupported InvalidAddrSpaceCast(
	MF.getFunction(), "invalid addrspacecast", SL.getDebugLoc());
	DAG.getContext()->diagnose(InvalidAddrSpaceCast);

	return DAG.getUNDEF(ASC->getValueType(0));
	}

	// This lowers an INSERT_SUBVECTOR by extracting the individual elements from
	// the small vector and inserting them into the big vector. That is better than
	// the default expansion of doing it via a stack slot. Even though the use of
	// the stack slot would be optimized away afterwards, the stack slot itself
	// remains.
	SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Vec = Op.getOperand(0);
	SDValue Ins = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);
	EVT VecVT = Vec.getValueType();
	EVT InsVT = Ins.getValueType();
	EVT EltVT = VecVT.getVectorElementType();
	unsigned InsNumElts = InsVT.getVectorNumElements();
	unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
	SDLoc SL(Op);

	for (unsigned I = 0; I != InsNumElts; ++I) {
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
	DAG.getConstant(I, SL, MVT::i32));
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, VecVT, Vec, Elt,
	DAG.getConstant(IdxVal + I, SL, MVT::i32));
	}
	return Vec;
	}

	SDValue SITargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue Vec = Op.getOperand(0);
	SDValue InsVal = Op.getOperand(1);
	SDValue Idx = Op.getOperand(2);
	EVT VecVT = Vec.getValueType();
	EVT EltVT = VecVT.getVectorElementType();
	unsigned VecSize = VecVT.getSizeInBits();
	unsigned EltSize = EltVT.getSizeInBits();
	SDLoc SL(Op);

	// Specially handle the case of v4i16 with static indexing.
	unsigned NumElts = VecVT.getVectorNumElements();
	auto KIdx = dyn_cast<ConstantSDNode>(Idx);
	if (NumElts == 4 && EltSize == 16 && KIdx) {
	SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Vec);

	SDValue LoHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
	DAG.getConstant(0, SL, MVT::i32));
	SDValue HiHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BCVec,
	DAG.getConstant(1, SL, MVT::i32));

	SDValue LoVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, LoHalf);
	SDValue HiVec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, HiHalf);

	unsigned Idx = KIdx->getZExtValue();
	bool InsertLo = Idx < 2;
	SDValue InsHalf = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, MVT::v2i16,
	InsertLo ? LoVec : HiVec,
	DAG.getNode(ISD::BITCAST, SL, MVT::i16, InsVal),
	DAG.getConstant(InsertLo ? Idx : (Idx - 2), SL, MVT::i32));

	InsHalf = DAG.getNode(ISD::BITCAST, SL, MVT::i32, InsHalf);

	SDValue Concat = InsertLo ?
	DAG.getBuildVector(MVT::v2i32, SL, { InsHalf, HiHalf }) :
	DAG.getBuildVector(MVT::v2i32, SL, { LoHalf, InsHalf });

	return DAG.getNode(ISD::BITCAST, SL, VecVT, Concat);
	}

	// Static indexing does not lower to stack access, and hence there is no need
	// for special custom lowering to avoid stack access.
	if (isa<ConstantSDNode>(Idx))
	return SDValue();

	// Avoid stack access for dynamic indexing by custom lowering to
	// v_bfi_b32 (v_bfm_b32 16, (shl idx, 16)), val, vec

	assert(VecSize <= 64 && "Expected target vector size to be <= 64 bits");

	MVT IntVT = MVT::getIntegerVT(VecSize);

	// Convert vector index to bit-index and get the required bit mask.
	assert(isPowerOf2_32(EltSize));
	SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
	SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);
	SDValue BFM = DAG.getNode(ISD::SHL, SL, IntVT,
	DAG.getConstant(0xffff, SL, IntVT),
	ScaledIdx);

	// 1. Create a congruent vector with the target value in each element.
	SDValue ExtVal = DAG.getNode(ISD::BITCAST, SL, IntVT,
	DAG.getSplatBuildVector(VecVT, SL, InsVal));

	// 2. Mask off all other indicies except the required index within (1).
	SDValue LHS = DAG.getNode(ISD::AND, SL, IntVT, BFM, ExtVal);

	// 3. Mask off the required index within the target vector.
	SDValue BCVec = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
	SDValue RHS = DAG.getNode(ISD::AND, SL, IntVT,
	DAG.getNOT(SL, BFM, IntVT), BCVec);

	// 4. Get (2) and (3) ORed into the target vector.
	SDValue BFI = DAG.getNode(ISD::OR, SL, IntVT, LHS, RHS);

	return DAG.getNode(ISD::BITCAST, SL, VecVT, BFI);
	}

	SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);

	EVT ResultVT = Op.getValueType();
	SDValue Vec = Op.getOperand(0);
	SDValue Idx = Op.getOperand(1);
	EVT VecVT = Vec.getValueType();
	unsigned VecSize = VecVT.getSizeInBits();
	EVT EltVT = VecVT.getVectorElementType();

	DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);

	// Make sure we do any optimizations that will make it easier to fold
	// source modifiers before obscuring it with bit operations.

	// XXX - Why doesn't this get called when vector_shuffle is expanded?
	if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
	return Combined;

	if (VecSize == 128 \|\| VecSize == 256) {
	SDValue Lo, Hi;
	EVT LoVT, HiVT;
	std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);

	if (VecSize == 128) {
	SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
	Lo = DAG.getBitcast(LoVT,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
	DAG.getConstant(0, SL, MVT::i32)));
	Hi = DAG.getBitcast(HiVT,
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
	DAG.getConstant(1, SL, MVT::i32)));
	} else {
	assert(VecSize == 256);

	SDValue V2 = DAG.getBitcast(MVT::v4i64, Vec);
	SDValue Parts[4];
	for (unsigned P = 0; P < 4; ++P) {
	Parts[P] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64, V2,
	DAG.getConstant(P, SL, MVT::i32));
	}

	Lo = DAG.getBitcast(LoVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
	Parts[0], Parts[1]));
	Hi = DAG.getBitcast(HiVT, DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i64,
	Parts[2], Parts[3]));
	}

	EVT IdxVT = Idx.getValueType();
	unsigned NElem = VecVT.getVectorNumElements();
	assert(isPowerOf2_32(NElem));
	SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
	SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
	SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
	return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Half, NewIdx);
	}

	assert(VecSize <= 64);

	MVT IntVT = MVT::getIntegerVT(VecSize);

	// If Vec is just a SCALAR_TO_VECTOR, then use the scalar integer directly.
	SDValue VecBC = peekThroughBitcasts(Vec);
	if (VecBC.getOpcode() == ISD::SCALAR_TO_VECTOR) {
	SDValue Src = VecBC.getOperand(0);
	Src = DAG.getBitcast(Src.getValueType().changeTypeToInteger(), Src);
	Vec = DAG.getAnyExtOrTrunc(Src, SL, IntVT);
	}

	unsigned EltSize = EltVT.getSizeInBits();
	assert(isPowerOf2_32(EltSize));

	SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);

	// Convert vector index to bit-index (* EltSize)
	SDValue ScaledIdx = DAG.getNode(ISD::SHL, SL, MVT::i32, Idx, ScaleFactor);

	SDValue BC = DAG.getNode(ISD::BITCAST, SL, IntVT, Vec);
	SDValue Elt = DAG.getNode(ISD::SRL, SL, IntVT, BC, ScaledIdx);

	if (ResultVT == MVT::f16) {
	SDValue Result = DAG.getNode(ISD::TRUNCATE, SL, MVT::i16, Elt);
	return DAG.getNode(ISD::BITCAST, SL, ResultVT, Result);
	}

	return DAG.getAnyExtOrTrunc(Elt, SL, ResultVT);
	}

	static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
	assert(Elt % 2 == 0);
	return Mask[Elt + 1] == Mask[Elt] + 1 && (Mask[Elt] % 2 == 0);
	}

	SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	EVT ResultVT = Op.getValueType();
	ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);

	EVT PackVT = ResultVT.isInteger() ? MVT::v2i16 : MVT::v2f16;
	EVT EltVT = PackVT.getVectorElementType();
	int SrcNumElts = Op.getOperand(0).getValueType().getVectorNumElements();

	// vector_shuffle <0,1,6,7> lhs, rhs
	// -> concat_vectors (extract_subvector lhs, 0), (extract_subvector rhs, 2)
	//
	// vector_shuffle <6,7,2,3> lhs, rhs
	// -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 2)
	//
	// vector_shuffle <6,7,0,1> lhs, rhs
	// -> concat_vectors (extract_subvector rhs, 2), (extract_subvector lhs, 0)

	// Avoid scalarizing when both halves are reading from consecutive elements.
	SmallVector<SDValue, 4> Pieces;
	for (int I = 0, N = ResultVT.getVectorNumElements(); I != N; I += 2) {
	if (elementPairIsContiguous(SVN->getMask(), I)) {
	const int Idx = SVN->getMaskElt(I);
	int VecIdx = Idx < SrcNumElts ? 0 : 1;
	int EltIdx = Idx < SrcNumElts ? Idx : Idx - SrcNumElts;
	SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL,
	PackVT, SVN->getOperand(VecIdx),
	DAG.getConstant(EltIdx, SL, MVT::i32));
	Pieces.push_back(SubVec);
	} else {
	const int Idx0 = SVN->getMaskElt(I);
	const int Idx1 = SVN->getMaskElt(I + 1);
	int VecIdx0 = Idx0 < SrcNumElts ? 0 : 1;
	int VecIdx1 = Idx1 < SrcNumElts ? 0 : 1;
	int EltIdx0 = Idx0 < SrcNumElts ? Idx0 : Idx0 - SrcNumElts;
	int EltIdx1 = Idx1 < SrcNumElts ? Idx1 : Idx1 - SrcNumElts;

	SDValue Vec0 = SVN->getOperand(VecIdx0);
	SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Vec0, DAG.getConstant(EltIdx0, SL, MVT::i32));

	SDValue Vec1 = SVN->getOperand(VecIdx1);
	SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Vec1, DAG.getConstant(EltIdx1, SL, MVT::i32));
	Pieces.push_back(DAG.getBuildVector(PackVT, SL, { Elt0, Elt1 }));
	}
	}

	return DAG.getNode(ISD::CONCAT_VECTORS, SL, ResultVT, Pieces);
	}

	SDValue SITargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDValue SVal = Op.getOperand(0);
	EVT ResultVT = Op.getValueType();
	EVT SValVT = SVal.getValueType();
	SDValue UndefVal = DAG.getUNDEF(SValVT);
	SDLoc SL(Op);

	SmallVector<SDValue, 8> VElts;
	VElts.push_back(SVal);
	for (int I = 1, E = ResultVT.getVectorNumElements(); I < E; ++I)
	VElts.push_back(UndefVal);

	return DAG.getBuildVector(ResultVT, SL, VElts);
	}

	SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	EVT VT = Op.getValueType();

	if (VT == MVT::v4i16 \|\| VT == MVT::v4f16 \|\|
	VT == MVT::v8i16 \|\| VT == MVT::v8f16) {
	EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
	VT.getVectorNumElements() / 2);
	MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());

	// Turn into pair of packed build_vectors.
	// TODO: Special case for constants that can be materialized with s_mov_b64.
	SmallVector<SDValue, 4> LoOps, HiOps;
	for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
	LoOps.push_back(Op.getOperand(I));
	HiOps.push_back(Op.getOperand(I + E));
	}
	SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
	SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);

	SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
	SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);

	SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
	{ CastLo, CastHi });
	return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
	}

	if (VT == MVT::v16i16 \|\| VT == MVT::v16f16) {
	EVT QuarterVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
	VT.getVectorNumElements() / 4);
	MVT QuarterIntVT = MVT::getIntegerVT(QuarterVT.getSizeInBits());

	SmallVector<SDValue, 4> Parts[4];
	for (unsigned I = 0, E = VT.getVectorNumElements() / 4; I != E; ++I) {
	for (unsigned P = 0; P < 4; ++P)
	Parts[P].push_back(Op.getOperand(I + P * E));
	}
	SDValue Casts[4];
	for (unsigned P = 0; P < 4; ++P) {
	SDValue Vec = DAG.getBuildVector(QuarterVT, SL, Parts[P]);
	Casts[P] = DAG.getNode(ISD::BITCAST, SL, QuarterIntVT, Vec);
	}

	SDValue Blend =
	DAG.getBuildVector(MVT::getVectorVT(QuarterIntVT, 4), SL, Casts);
	return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
	}

	assert(VT == MVT::v2f16 \|\| VT == MVT::v2i16);
	assert(!Subtarget->hasVOP3PInsts() && "this should be legal");

	SDValue Lo = Op.getOperand(0);
	SDValue Hi = Op.getOperand(1);

	// Avoid adding defined bits with the zero_extend.
	if (Hi.isUndef()) {
	Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
	SDValue ExtLo = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Lo);
	return DAG.getNode(ISD::BITCAST, SL, VT, ExtLo);
	}

	Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Hi);
	Hi = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Hi);

	SDValue ShlHi = DAG.getNode(ISD::SHL, SL, MVT::i32, Hi,
	DAG.getConstant(16, SL, MVT::i32));
	if (Lo.isUndef())
	return DAG.getNode(ISD::BITCAST, SL, VT, ShlHi);

	Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Lo);
	Lo = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Lo);

	SDValue Or = DAG.getNode(ISD::OR, SL, MVT::i32, Lo, ShlHi);
	return DAG.getNode(ISD::BITCAST, SL, VT, Or);
	}

	bool
	SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
	// We can fold offsets for anything that doesn't require a GOT relocation.
	return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS \|\|
	GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS \|\|
	GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
	!shouldEmitGOTReloc(GA->getGlobal());
	}

	static SDValue
	buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
	const SDLoc &DL, int64_t Offset, EVT PtrVT,
	unsigned GAFlags = SIInstrInfo::MO_NONE) {
	assert(isInt<32>(Offset + 4) && "32-bit offset is expected!");
	// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
	// lowered to the following code sequence:
	//
	// For constant address space:
	// s_getpc_b64 s[0:1]
	// s_add_u32 s0, s0, $symbol
	// s_addc_u32 s1, s1, 0
	//
	// s_getpc_b64 returns the address of the s_add_u32 instruction and then
	// a fixup or relocation is emitted to replace $symbol with a literal
	// constant, which is a pc-relative offset from the encoding of the $symbol
	// operand to the global variable.
	//
	// For global address space:
	// s_getpc_b64 s[0:1]
	// s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
	// s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
	//
	// s_getpc_b64 returns the address of the s_add_u32 instruction and then
	// fixups or relocations are emitted to replace $symbol@*@lo and
	// $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
	// which is a 64-bit pc-relative offset from the encoding of the $symbol
	// operand to the global variable.
	//
	// What we want here is an offset from the value returned by s_getpc
	// (which is the address of the s_add_u32 instruction) to the global
	// variable, but since the encoding of $symbol starts 4 bytes after the start
	// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
	// small. This requires us to add 4 to the global variable offset in order to
	// compute the correct address. Similarly for the s_addc_u32 instruction, the
	// encoding of $symbol starts 12 bytes after the start of the s_add_u32
	// instruction.
	SDValue PtrLo =
	DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4, GAFlags);
	SDValue PtrHi;
	if (GAFlags == SIInstrInfo::MO_NONE) {
	PtrHi = DAG.getTargetConstant(0, DL, MVT::i32);
	} else {
	PtrHi =
	DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 12, GAFlags + 1);
	}
	return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
	}

	SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
	SDValue Op,
	SelectionDAG &DAG) const {
	GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
	SDLoc DL(GSD);
	EVT PtrVT = Op.getValueType();

	const GlobalValue *GV = GSD->getGlobal();
	if ((GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
	shouldUseLDSConstAddress(GV)) \|\|
	GSD->getAddressSpace() == AMDGPUAS::REGION_ADDRESS \|\|
	GSD->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
	if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
	GV->hasExternalLinkage()) {
	Type *Ty = GV->getValueType();
	// HIP uses an unsized array `extern __shared__ T s[]` or similar
	// zero-sized type in other languages to declare the dynamic shared
	// memory which size is not known at the compile time. They will be
	// allocated by the runtime and placed directly after the static
	// allocated ones. They all share the same offset.
	if (DAG.getDataLayout().getTypeAllocSize(Ty).isZero()) {
	assert(PtrVT == MVT::i32 && "32-bit pointer is expected.");
	// Adjust alignment for that dynamic shared memory array.
	MFI->setDynLDSAlign(DAG.getDataLayout(), *cast<GlobalVariable>(GV));
	return SDValue(
	DAG.getMachineNode(AMDGPU::GET_GROUPSTATICSIZE, DL, PtrVT), 0);
	}
	}
	return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
	}

	if (GSD->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, GSD->getOffset(),
	SIInstrInfo::MO_ABS32_LO);
	return DAG.getNode(AMDGPUISD::LDS, DL, MVT::i32, GA);
	}

	if (shouldEmitFixup(GV))
	return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
	else if (shouldEmitPCReloc(GV))
	return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
	SIInstrInfo::MO_REL32);

	SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
	SIInstrInfo::MO_GOTPCREL32);

	Type Ty = PtrVT.getTypeForEVT(DAG.getContext());
	PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
	const DataLayout &DataLayout = DAG.getDataLayout();
	Align Alignment = DataLayout.getABITypeAlign(PtrTy);
	MachinePointerInfo PtrInfo
	= MachinePointerInfo::getGOT(DAG.getMachineFunction());

	return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Alignment,
	MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant);
	}

	SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain,
	const SDLoc &DL, SDValue V) const {
	// We can't use S_MOV_B32 directly, because there is no way to specify m0 as
	// the destination register.
	//
	// We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
	// so we will end up with redundant moves to m0.
	//
	// We use a pseudo to ensure we emit s_mov_b32 with m0 as the direct result.

	// A Null SDValue creates a glue result.
	SDNode *M0 = DAG.getMachineNode(AMDGPU::SI_INIT_M0, DL, MVT::Other, MVT::Glue,
	V, Chain);
	return SDValue(M0, 0);
	}

	SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
	SDValue Op,
	MVT VT,
	unsigned Offset) const {
	SDLoc SL(Op);
	SDValue Param = lowerKernargMemParameter(
	DAG, MVT::i32, MVT::i32, SL, DAG.getEntryNode(), Offset, Align(4), false);
	// The local size values will have the hi 16-bits as zero.
	return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
	DAG.getValueType(VT));
	}

	static SDValue emitNonHSAIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
	EVT VT) {
	DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(),
	"non-hsa intrinsic with hsa target",
	DL.getDebugLoc());
	DAG.getContext()->diagnose(BadIntrin);
	return DAG.getUNDEF(VT);
	}

	static SDValue emitRemovedIntrinsicError(SelectionDAG &DAG, const SDLoc &DL,
	EVT VT) {
	DiagnosticInfoUnsupported BadIntrin(DAG.getMachineFunction().getFunction(),
	"intrinsic not supported on subtarget",
	DL.getDebugLoc());
	DAG.getContext()->diagnose(BadIntrin);
	return DAG.getUNDEF(VT);
	}

	static SDValue getBuildDwordsVector(SelectionDAG &DAG, SDLoc DL,
	ArrayRef<SDValue> Elts) {
	assert(!Elts.empty());
	MVT Type;
	unsigned NumElts = Elts.size();

	if (NumElts <= 8) {
	Type = MVT::getVectorVT(MVT::f32, NumElts);
	} else {
	assert(Elts.size() <= 16);
	Type = MVT::v16f32;
	NumElts = 16;
	}

	SmallVector<SDValue, 16> VecElts(NumElts);
	for (unsigned i = 0; i < Elts.size(); ++i) {
	SDValue Elt = Elts[i];
	if (Elt.getValueType() != MVT::f32)
	Elt = DAG.getBitcast(MVT::f32, Elt);
	VecElts[i] = Elt;
	}
	for (unsigned i = Elts.size(); i < NumElts; ++i)
	VecElts[i] = DAG.getUNDEF(MVT::f32);

	if (NumElts == 1)
	return VecElts[0];
	return DAG.getBuildVector(Type, DL, VecElts);
	}

	static SDValue padEltsToUndef(SelectionDAG &DAG, const SDLoc &DL, EVT CastVT,
	SDValue Src, int ExtraElts) {
	EVT SrcVT = Src.getValueType();

	SmallVector<SDValue, 8> Elts;

	if (SrcVT.isVector())
	DAG.ExtractVectorElements(Src, Elts);
	else
	Elts.push_back(Src);

	SDValue Undef = DAG.getUNDEF(SrcVT.getScalarType());
	while (ExtraElts--)
	Elts.push_back(Undef);

	return DAG.getBuildVector(CastVT, DL, Elts);
	}

	// Re-construct the required return value for a image load intrinsic.
	// This is more complicated due to the optional use TexFailCtrl which means the required
	// return type is an aggregate
	static SDValue constructRetValue(SelectionDAG &DAG,
	MachineSDNode *Result,
	ArrayRef<EVT> ResultTypes,
	bool IsTexFail, bool Unpacked, bool IsD16,
	int DMaskPop, int NumVDataDwords,
	const SDLoc &DL) {
	// Determine the required return type. This is the same regardless of IsTexFail flag
	EVT ReqRetVT = ResultTypes[0];
	int ReqRetNumElts = ReqRetVT.isVector() ? ReqRetVT.getVectorNumElements() : 1;
	int NumDataDwords = (!IsD16 \|\| (IsD16 && Unpacked)) ?
	ReqRetNumElts : (ReqRetNumElts + 1) / 2;

	int MaskPopDwords = (!IsD16 \|\| (IsD16 && Unpacked)) ?
	DMaskPop : (DMaskPop + 1) / 2;

	MVT DataDwordVT = NumDataDwords == 1 ?
	MVT::i32 : MVT::getVectorVT(MVT::i32, NumDataDwords);

	MVT MaskPopVT = MaskPopDwords == 1 ?
	MVT::i32 : MVT::getVectorVT(MVT::i32, MaskPopDwords);

	SDValue Data(Result, 0);
	SDValue TexFail;

	if (DMaskPop > 0 && Data.getValueType() != MaskPopVT) {
	SDValue ZeroIdx = DAG.getConstant(0, DL, MVT::i32);
	if (MaskPopVT.isVector()) {
	Data = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MaskPopVT,
	SDValue(Result, 0), ZeroIdx);
	} else {
	Data = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MaskPopVT,
	SDValue(Result, 0), ZeroIdx);
	}
	}

	if (DataDwordVT.isVector())
	Data = padEltsToUndef(DAG, DL, DataDwordVT, Data,
	NumDataDwords - MaskPopDwords);

	if (IsD16)
	Data = adjustLoadValueTypeImpl(Data, ReqRetVT, DL, DAG, Unpacked);

	EVT LegalReqRetVT = ReqRetVT;
	if (!ReqRetVT.isVector()) {
	if (!Data.getValueType().isInteger())
	Data = DAG.getNode(ISD::BITCAST, DL,
	Data.getValueType().changeTypeToInteger(), Data);
	Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
	} else {
	// We need to widen the return vector to a legal type
	if ((ReqRetVT.getVectorNumElements() % 2) == 1 &&
	ReqRetVT.getVectorElementType().getSizeInBits() == 16) {
	LegalReqRetVT =
	EVT::getVectorVT(*DAG.getContext(), ReqRetVT.getVectorElementType(),
	ReqRetVT.getVectorNumElements() + 1);
	}
	}
	Data = DAG.getNode(ISD::BITCAST, DL, LegalReqRetVT, Data);

	if (IsTexFail) {
	TexFail =
	DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SDValue(Result, 0),
	DAG.getConstant(MaskPopDwords, DL, MVT::i32));

	return DAG.getMergeValues({Data, TexFail, SDValue(Result, 1)}, DL);
	}

	if (Result->getNumValues() == 1)
	return Data;

	return DAG.getMergeValues({Data, SDValue(Result, 1)}, DL);
	}

	static bool parseTexFail(SDValue TexFailCtrl, SelectionDAG &DAG, SDValue *TFE,
	SDValue *LWE, bool &IsTexFail) {
	auto TexFailCtrlConst = cast<ConstantSDNode>(TexFailCtrl.getNode());

	uint64_t Value = TexFailCtrlConst->getZExtValue();
	if (Value) {
	IsTexFail = true;
	}

	SDLoc DL(TexFailCtrlConst);
	*TFE = DAG.getTargetConstant((Value & 0x1) ? 1 : 0, DL, MVT::i32);
	Value &= ~(uint64_t)0x1;
	*LWE = DAG.getTargetConstant((Value & 0x2) ? 1 : 0, DL, MVT::i32);
	Value &= ~(uint64_t)0x2;

	return Value == 0;
	}

	static void packImage16bitOpsToDwords(SelectionDAG &DAG, SDValue Op,
	MVT PackVectorVT,
	SmallVectorImpl<SDValue> &PackedAddrs,
	unsigned DimIdx, unsigned EndIdx,
	unsigned NumGradients) {
	SDLoc DL(Op);
	for (unsigned I = DimIdx; I < EndIdx; I++) {
	SDValue Addr = Op.getOperand(I);

	// Gradients are packed with undef for each coordinate.
	// In <hi 16 bit>,<lo 16 bit> notation, the registers look like this:
	// 1D: undef,dx/dh; undef,dx/dv
	// 2D: dy/dh,dx/dh; dy/dv,dx/dv
	// 3D: dy/dh,dx/dh; undef,dz/dh; dy/dv,dx/dv; undef,dz/dv
	if (((I + 1) >= EndIdx) \|\|
	((NumGradients / 2) % 2 == 1 && (I == DimIdx + (NumGradients / 2) - 1 \|\|
	I == DimIdx + NumGradients - 1))) {
	if (Addr.getValueType() != MVT::i16)
	Addr = DAG.getBitcast(MVT::i16, Addr);
	Addr = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Addr);
	} else {
	Addr = DAG.getBuildVector(PackVectorVT, DL, {Addr, Op.getOperand(I + 1)});
	I++;
	}
	Addr = DAG.getBitcast(MVT::f32, Addr);
	PackedAddrs.push_back(Addr);
	}
	}

	SDValue SITargetLowering::lowerImage(SDValue Op,
	const AMDGPU::ImageDimIntrinsicInfo *Intr,
	SelectionDAG &DAG, bool WithChain) const {
	SDLoc DL(Op);
	MachineFunction &MF = DAG.getMachineFunction();
	const GCNSubtarget* ST = &MF.getSubtarget<GCNSubtarget>();
	const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
	AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
	const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
	unsigned IntrOpcode = Intr->BaseOpcode;
	bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
	bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);

	SmallVector<EVT, 3> ResultTypes(Op->values());
	SmallVector<EVT, 3> OrigResultTypes(Op->values());
	bool IsD16 = false;
	bool IsG16 = false;
	bool IsA16 = false;
	SDValue VData;
	int NumVDataDwords;
	bool AdjustRetType = false;

	// Offset of intrinsic arguments
	const unsigned ArgOffset = WithChain ? 2 : 1;

	unsigned DMask;
	unsigned DMaskLanes = 0;

	if (BaseOpcode->Atomic) {
	VData = Op.getOperand(2);

	bool Is64Bit = VData.getValueType() == MVT::i64;
	if (BaseOpcode->AtomicX2) {
	SDValue VData2 = Op.getOperand(3);
	VData = DAG.getBuildVector(Is64Bit ? MVT::v2i64 : MVT::v2i32, DL,
	{VData, VData2});
	if (Is64Bit)
	VData = DAG.getBitcast(MVT::v4i32, VData);

	ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
	DMask = Is64Bit ? 0xf : 0x3;
	NumVDataDwords = Is64Bit ? 4 : 2;
	} else {
	DMask = Is64Bit ? 0x3 : 0x1;
	NumVDataDwords = Is64Bit ? 2 : 1;
	}
	} else {
	auto *DMaskConst =
	cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->DMaskIndex));
	DMask = DMaskConst->getZExtValue();
	DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask);

	if (BaseOpcode->Store) {
	VData = Op.getOperand(2);

	MVT StoreVT = VData.getSimpleValueType();
	if (StoreVT.getScalarType() == MVT::f16) {
	if (!Subtarget->hasD16Images() \|\| !BaseOpcode->HasD16)
	return Op; // D16 is unsupported for this instruction

	IsD16 = true;
	VData = handleD16VData(VData, DAG, true);
	}

	NumVDataDwords = (VData.getValueType().getSizeInBits() + 31) / 32;
	} else {
	// Work out the num dwords based on the dmask popcount and underlying type
	// and whether packing is supported.
	MVT LoadVT = ResultTypes[0].getSimpleVT();
	if (LoadVT.getScalarType() == MVT::f16) {
	if (!Subtarget->hasD16Images() \|\| !BaseOpcode->HasD16)
	return Op; // D16 is unsupported for this instruction

	IsD16 = true;
	}

	// Confirm that the return type is large enough for the dmask specified
	if ((LoadVT.isVector() && LoadVT.getVectorNumElements() < DMaskLanes) \|\|
	(!LoadVT.isVector() && DMaskLanes > 1))
	return Op;

	// The sq block of gfx8 and gfx9 do not estimate register use correctly
	// for d16 image_gather4, image_gather4_l, and image_gather4_lz
	// instructions.
	if (IsD16 && !Subtarget->hasUnpackedD16VMem() &&
	!(BaseOpcode->Gather4 && Subtarget->hasImageGather4D16Bug()))
	NumVDataDwords = (DMaskLanes + 1) / 2;
	else
	NumVDataDwords = DMaskLanes;

	AdjustRetType = true;
	}
	}

	unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
	SmallVector<SDValue, 4> VAddrs;

	// Check for 16 bit addresses or derivatives and pack if true.
	MVT VAddrVT =
	Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
	MVT VAddrScalarVT = VAddrVT.getScalarType();
	MVT GradPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
	IsG16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;

	VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType();
	VAddrScalarVT = VAddrVT.getScalarType();
	MVT AddrPackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16;
	IsA16 = VAddrScalarVT == MVT::f16 \|\| VAddrScalarVT == MVT::i16;

	// Push back extra arguments.
	for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
	if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
	assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
	// Special handling of bias when A16 is on. Bias is of type half but
	// occupies full 32-bit.
	SDValue Bias = DAG.getBuildVector(
	MVT::v2f16, DL,
	{Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
	VAddrs.push_back(Bias);
	} else {
	assert((!IsA16 \|\| Intr->NumBiasArgs == 0 \|\| I != Intr->BiasIndex) &&
	"Bias needs to be converted to 16 bit in A16 mode");
	VAddrs.push_back(Op.getOperand(ArgOffset + I));
	}
	}

	if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
	// 16 bit gradients are supported, but are tied to the A16 control
	// so both gradients and addresses must be 16 bit
	LLVM_DEBUG(
	dbgs() << "Failed to lower image intrinsic: 16 bit addresses "
	"require 16 bit args for both gradients and addresses");
	return Op;
	}

	if (IsA16) {
	if (!ST->hasA16()) {
	LLVM_DEBUG(dbgs() << "Failed to lower image intrinsic: Target does not "
	"support 16 bit addresses\n");
	return Op;
	}
	}

	// We've dealt with incorrect input so we know that if IsA16, IsG16
	// are set then we have to compress/pack operands (either address,
	// gradient or both)
	// In the case where a16 and gradients are tied (no G16 support) then we
	// have already verified that both IsA16 and IsG16 are true
	if (BaseOpcode->Gradients && IsG16 && ST->hasG16()) {
	// Activate g16
	const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
	AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
	IntrOpcode = G16MappingInfo->G16; // set new opcode to variant with _g16
	}

	// Add gradients (packed or unpacked)
	if (IsG16) {
	// Pack the gradients
	// const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart);
	packImage16bitOpsToDwords(DAG, Op, GradPackVectorVT, VAddrs,
	ArgOffset + Intr->GradientStart,
	ArgOffset + Intr->CoordStart, Intr->NumGradients);
	} else {
	for (unsigned I = ArgOffset + Intr->GradientStart;
	I < ArgOffset + Intr->CoordStart; I++)
	VAddrs.push_back(Op.getOperand(I));
	}

	// Add addresses (packed or unpacked)
	if (IsA16) {
	packImage16bitOpsToDwords(DAG, Op, AddrPackVectorVT, VAddrs,
	ArgOffset + Intr->CoordStart, VAddrEnd,
	0 /* No gradients */);
	} else {
	// Add uncompressed address
	for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++)
	VAddrs.push_back(Op.getOperand(I));
	}

	// If the register allocator cannot place the address registers contiguously
	// without introducing moves, then using the non-sequential address encoding
	// is always preferable, since it saves VALU instructions and is usually a
	// wash in terms of code size or even better.
	//
	// However, we currently have no way of hinting to the register allocator that
	// MIMG addresses should be placed contiguously when it is possible to do so,
	// so force non-NSA for the common 2-address case as a heuristic.
	//
	// SIShrinkInstructions will convert NSA encodings to non-NSA after register
	// allocation when possible.
	//
	// TODO: we can actually allow partial NSA where the final register is a
	// contiguous set of the remaining addresses.
	// This could help where there are more addresses than supported.
	bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
	VAddrs.size() >= 3 &&
	VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
	SDValue VAddr;
	if (!UseNSA)
	VAddr = getBuildDwordsVector(DAG, DL, VAddrs);

	SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
	SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
	SDValue Unorm;
	if (!BaseOpcode->Sampler) {
	Unorm = True;
	} else {
	auto UnormConst =
	cast<ConstantSDNode>(Op.getOperand(ArgOffset + Intr->UnormIndex));

	Unorm = UnormConst->getZExtValue() ? True : False;
	}

	SDValue TFE;
	SDValue LWE;
	SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex);
	bool IsTexFail = false;
	if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail))
	return Op;

	if (IsTexFail) {
	if (!DMaskLanes) {
	// Expecting to get an error flag since TFC is on - and dmask is 0
	// Force dmask to be at least 1 otherwise the instruction will fail
	DMask = 0x1;
	DMaskLanes = 1;
	NumVDataDwords = 1;
	}
	NumVDataDwords += 1;
	AdjustRetType = true;
	}

	// Has something earlier tagged that the return type needs adjusting
	// This happens if the instruction is a load or has set TexFailCtrl flags
	if (AdjustRetType) {
	// NumVDataDwords reflects the true number of dwords required in the return type
	if (DMaskLanes == 0 && !BaseOpcode->Store) {
	// This is a no-op load. This can be eliminated
	SDValue Undef = DAG.getUNDEF(Op.getValueType());
	if (isa<MemSDNode>(Op))
	return DAG.getMergeValues({Undef, Op.getOperand(0)}, DL);
	return Undef;
	}

	EVT NewVT = NumVDataDwords > 1 ?
	EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumVDataDwords)
	: MVT::i32;

	ResultTypes[0] = NewVT;
	if (ResultTypes.size() == 3) {
	// Original result was aggregate type used for TexFailCtrl results
	// The actual instruction returns as a vector type which has now been
	// created. Remove the aggregate result.
	ResultTypes.erase(&ResultTypes[1]);
	}
	}

	unsigned CPol = cast<ConstantSDNode>(
	Op.getOperand(ArgOffset + Intr->CachePolicyIndex))->getZExtValue();
	if (BaseOpcode->Atomic)
	CPol \|= AMDGPU::CPol::GLC; // TODO no-return optimization
	if (CPol & ~AMDGPU::CPol::ALL)
	return Op;

	SmallVector<SDValue, 26> Ops;
	if (BaseOpcode->Store \|\| BaseOpcode->Atomic)
	Ops.push_back(VData); // vdata
	if (UseNSA)
	append_range(Ops, VAddrs);
	else
	Ops.push_back(VAddr);
	Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex));
	if (BaseOpcode->Sampler)
	Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex));
	Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32));
	if (IsGFX10Plus)
	Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32));
	Ops.push_back(Unorm);
	Ops.push_back(DAG.getTargetConstant(CPol, DL, MVT::i32));
	Ops.push_back(IsA16 && // r128, a16 for gfx9
	ST->hasFeature(AMDGPU::FeatureR128A16) ? True : False);
	if (IsGFX10Plus)
	Ops.push_back(IsA16 ? True : False);
	if (!Subtarget->hasGFX90AInsts()) {
	Ops.push_back(TFE); //tfe
	} else if (cast<ConstantSDNode>(TFE)->getZExtValue()) {
	report_fatal_error("TFE is not supported on this GPU");
	}
	Ops.push_back(LWE); // lwe
	if (!IsGFX10Plus)
	Ops.push_back(DimInfo->DA ? True : False);
	if (BaseOpcode->HasD16)
	Ops.push_back(IsD16 ? True : False);
	if (isa<MemSDNode>(Op))
	Ops.push_back(Op.getOperand(0)); // chain

	int NumVAddrDwords =
	UseNSA ? VAddrs.size() : VAddr.getValueType().getSizeInBits() / 32;
	int Opcode = -1;

	if (IsGFX11Plus) {
	Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
	UseNSA ? AMDGPU::MIMGEncGfx11NSA
	: AMDGPU::MIMGEncGfx11Default,
	NumVDataDwords, NumVAddrDwords);
	} else if (IsGFX10Plus) {
	Opcode = AMDGPU::getMIMGOpcode(IntrOpcode,
	UseNSA ? AMDGPU::MIMGEncGfx10NSA
	: AMDGPU::MIMGEncGfx10Default,
	NumVDataDwords, NumVAddrDwords);
	} else {
	if (Subtarget->hasGFX90AInsts()) {
	Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
	NumVDataDwords, NumVAddrDwords);
	if (Opcode == -1)
	report_fatal_error(
	"requested image instruction is not supported on this GPU");
	}
	if (Opcode == -1 &&
	Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
	Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx8,
	NumVDataDwords, NumVAddrDwords);
	if (Opcode == -1)
	Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
	NumVDataDwords, NumVAddrDwords);
	}
	assert(Opcode != -1);

	MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
	if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
	MachineMemOperand *MemRef = MemOp->getMemOperand();
	DAG.setNodeMemRefs(NewNode, {MemRef});
	}

	if (BaseOpcode->AtomicX2) {
	SmallVector<SDValue, 1> Elt;
	DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
	return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
	}
	if (BaseOpcode->Store)
	return SDValue(NewNode, 0);
	return constructRetValue(DAG, NewNode,
	OrigResultTypes, IsTexFail,
	Subtarget->hasUnpackedD16VMem(), IsD16,
	DMaskLanes, NumVDataDwords, DL);
	}

	SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
	SDValue Offset, SDValue CachePolicy,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();

	const DataLayout &DataLayout = DAG.getDataLayout();
	Align Alignment =
	DataLayout.getABITypeAlign(VT.getTypeForEVT(*DAG.getContext()));

	MachineMemOperand *MMO = MF.getMachineMemOperand(
	MachinePointerInfo(),
	MachineMemOperand::MOLoad \| MachineMemOperand::MODereferenceable \|
	MachineMemOperand::MOInvariant,
	VT.getStoreSize(), Alignment);

	if (!Offset->isDivergent()) {
	SDValue Ops[] = {
	Rsrc,
	Offset, // Offset
	CachePolicy
	};

	// Widen vec3 load to vec4.
	if (VT.isVector() && VT.getVectorNumElements() == 3) {
	EVT WidenedVT =
	EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
	auto WidenedOp = DAG.getMemIntrinsicNode(
	AMDGPUISD::SBUFFER_LOAD, DL, DAG.getVTList(WidenedVT), Ops, WidenedVT,
	MF.getMachineMemOperand(MMO, 0, WidenedVT.getStoreSize()));
	auto Subvector = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, WidenedOp,
	DAG.getVectorIdxConstant(0, DL));
	return Subvector;
	}

	return DAG.getMemIntrinsicNode(AMDGPUISD::SBUFFER_LOAD, DL,
	DAG.getVTList(VT), Ops, VT, MMO);
	}

	// We have a divergent offset. Emit a MUBUF buffer load instead. We can
	// assume that the buffer is unswizzled.
	SmallVector<SDValue, 4> Loads;
	unsigned NumLoads = 1;
	MVT LoadVT = VT.getSimpleVT();
	unsigned NumElts = LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1;
	assert((LoadVT.getScalarType() == MVT::i32 \|\|
	LoadVT.getScalarType() == MVT::f32));

	if (NumElts == 8 \|\| NumElts == 16) {
	NumLoads = NumElts / 4;
	LoadVT = MVT::getVectorVT(LoadVT.getScalarType(), 4);
	}

	SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
	SDValue Ops[] = {
	DAG.getEntryNode(), // Chain
	Rsrc, // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	{}, // voffset
	{}, // soffset
	{}, // offset
	CachePolicy, // cachepolicy
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};

	// Use the alignment to ensure that the required offsets will fit into the
	// immediate offsets.
	setBufferOffsets(Offset, DAG, &Ops[3],
	NumLoads > 1 ? Align(16 * NumLoads) : Align(4));

	uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
	for (unsigned i = 0; i < NumLoads; ++i) {
	Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
	Loads.push_back(getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList, Ops,
	LoadVT, MMO, DAG));
	}

	if (NumElts == 8 \|\| NumElts == 16)
	return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Loads);

	return Loads[0];
	}

	SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
	unsigned Dim,
	const ArgDescriptor &Arg) const {
	SDLoc SL(Op);
	MachineFunction &MF = DAG.getMachineFunction();
	unsigned MaxID = Subtarget->getMaxWorkitemID(MF.getFunction(), Dim);
	if (MaxID == 0)
	return DAG.getConstant(0, SL, MVT::i32);

	SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
	SDLoc(DAG.getEntryNode()), Arg);

	// Don't bother inserting AssertZext for packed IDs since we're emitting the
	// masking operations anyway.
	//
	// TODO: We could assert the top bit is 0 for the source copy.
	if (Arg.isMasked())
	return Val;

	// Preserve the known bits after expansion to a copy.
	EVT SmallVT =
	EVT::getIntegerVT(*DAG.getContext(), 32 - countLeadingZeros(MaxID));
	return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Val,
	DAG.getValueType(SmallVT));
	}

	SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();
	auto MFI = MF.getInfo<SIMachineFunctionInfo>();

	EVT VT = Op.getValueType();
	SDLoc DL(Op);
	unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();

	// TODO: Should this propagate fast-math-flags?

	switch (IntrinsicID) {
	case Intrinsic::amdgcn_implicit_buffer_ptr: {
	if (getSubtarget()->isAmdHsaOrMesa(MF.getFunction()))
	return emitNonHSAIntrinsicError(DAG, DL, VT);
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
	}
	case Intrinsic::amdgcn_dispatch_ptr:
	case Intrinsic::amdgcn_queue_ptr: {
	if (!Subtarget->isAmdHsaOrMesa(MF.getFunction())) {
	DiagnosticInfoUnsupported BadIntrin(
	MF.getFunction(), "unsupported hsa intrinsic without hsa target",
	DL.getDebugLoc());
	DAG.getContext()->diagnose(BadIntrin);
	return DAG.getUNDEF(VT);
	}

	auto RegID = IntrinsicID == Intrinsic::amdgcn_dispatch_ptr ?
	AMDGPUFunctionArgInfo::DISPATCH_PTR : AMDGPUFunctionArgInfo::QUEUE_PTR;
	return getPreloadedValue(DAG, *MFI, VT, RegID);
	}
	case Intrinsic::amdgcn_implicitarg_ptr: {
	if (MFI->isEntryFunction())
	return getImplicitArgPtr(DAG, DL);
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
	}
	case Intrinsic::amdgcn_kernarg_segment_ptr: {
	if (!AMDGPU::isKernel(MF.getFunction().getCallingConv())) {
	// This only makes sense to call in a kernel, so just lower to null.
	return DAG.getConstant(0, DL, VT);
	}

	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
	}
	case Intrinsic::amdgcn_dispatch_id: {
	return getPreloadedValue(DAG, *MFI, VT, AMDGPUFunctionArgInfo::DISPATCH_ID);
	}
	case Intrinsic::amdgcn_rcp:
	return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
	case Intrinsic::amdgcn_rsq:
	return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
	case Intrinsic::amdgcn_rsq_legacy:
	if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
	return emitRemovedIntrinsicError(DAG, DL, VT);
	return SDValue();
	case Intrinsic::amdgcn_rcp_legacy:
	if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
	return emitRemovedIntrinsicError(DAG, DL, VT);
	return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
	case Intrinsic::amdgcn_rsq_clamp: {
	if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
	return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));

	Type Type = VT.getTypeForEVT(DAG.getContext());
	APFloat Max = APFloat::getLargest(Type->getFltSemantics());
	APFloat Min = APFloat::getLargest(Type->getFltSemantics(), true);

	SDValue Rsq = DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
	SDValue Tmp = DAG.getNode(ISD::FMINNUM, DL, VT, Rsq,
	DAG.getConstantFP(Max, DL, VT));
	return DAG.getNode(ISD::FMAXNUM, DL, VT, Tmp,
	DAG.getConstantFP(Min, DL, VT));
	}
	case Intrinsic::r600_read_ngroups_x:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::NGROUPS_X, Align(4),
	false);
	case Intrinsic::r600_read_ngroups_y:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::NGROUPS_Y, Align(4),
	false);
	case Intrinsic::r600_read_ngroups_z:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::NGROUPS_Z, Align(4),
	false);
	case Intrinsic::r600_read_global_size_x:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::GLOBAL_SIZE_X,
	Align(4), false);
	case Intrinsic::r600_read_global_size_y:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::GLOBAL_SIZE_Y,
	Align(4), false);
	case Intrinsic::r600_read_global_size_z:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
	SI::KernelInputOffsets::GLOBAL_SIZE_Z,
	Align(4), false);
	case Intrinsic::r600_read_local_size_x:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerImplicitZextParam(DAG, Op, MVT::i16,
	SI::KernelInputOffsets::LOCAL_SIZE_X);
	case Intrinsic::r600_read_local_size_y:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerImplicitZextParam(DAG, Op, MVT::i16,
	SI::KernelInputOffsets::LOCAL_SIZE_Y);
	case Intrinsic::r600_read_local_size_z:
	if (Subtarget->isAmdHsaOS())
	return emitNonHSAIntrinsicError(DAG, DL, VT);

	return lowerImplicitZextParam(DAG, Op, MVT::i16,
	SI::KernelInputOffsets::LOCAL_SIZE_Z);
	case Intrinsic::amdgcn_workgroup_id_x:
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
	case Intrinsic::amdgcn_workgroup_id_y:
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
	case Intrinsic::amdgcn_workgroup_id_z:
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
	case Intrinsic::amdgcn_lds_kernel_id: {
	if (MFI->isEntryFunction())
	return getLDSKernelId(DAG, DL);
	return getPreloadedValue(DAG, *MFI, VT,
	AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
	}
	case Intrinsic::amdgcn_workitem_id_x:
	return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
	case Intrinsic::amdgcn_workitem_id_y:
	return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
	case Intrinsic::amdgcn_workitem_id_z:
	return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
	case Intrinsic::amdgcn_wavefrontsize:
	return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
	SDLoc(Op), MVT::i32);
	case Intrinsic::amdgcn_s_buffer_load: {
	unsigned CPol = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
	if (CPol & ~AMDGPU::CPol::ALL)
	return Op;
	return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
	DAG);
	}
	case Intrinsic::amdgcn_fdiv_fast:
	return lowerFDIV_FAST(Op, DAG);
	case Intrinsic::amdgcn_sin:
	return DAG.getNode(AMDGPUISD::SIN_HW, DL, VT, Op.getOperand(1));

	case Intrinsic::amdgcn_cos:
	return DAG.getNode(AMDGPUISD::COS_HW, DL, VT, Op.getOperand(1));

	case Intrinsic::amdgcn_mul_u24:
	return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT, Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::amdgcn_mul_i24:
	return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT, Op.getOperand(1), Op.getOperand(2));

	case Intrinsic::amdgcn_log_clamp: {
	if (Subtarget->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
	return SDValue();

	return emitRemovedIntrinsicError(DAG, DL, VT);
	}
	case Intrinsic::amdgcn_ldexp:
	return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
	Op.getOperand(1), Op.getOperand(2));

	case Intrinsic::amdgcn_fract:
	return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));

	case Intrinsic::amdgcn_class:
	return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::amdgcn_div_fmas:
	return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(4));

	case Intrinsic::amdgcn_div_fixup:
	return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

	case Intrinsic::amdgcn_div_scale: {
	const ConstantSDNode *Param = cast<ConstantSDNode>(Op.getOperand(3));

	// Translate to the operands expected by the machine instruction. The
	// first parameter must be the same as the first instruction.
	SDValue Numerator = Op.getOperand(1);
	SDValue Denominator = Op.getOperand(2);

	// Note this order is opposite of the machine instruction's operations,
	// which is s0.f = Quotient, s1.f = Denominator, s2.f = Numerator. The
	// intrinsic has the numerator as the first operand to match a normal
	// division operation.

	SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;

	return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
	Denominator, Numerator);
	}
	case Intrinsic::amdgcn_icmp: {
	// There is a Pat that handles this variant, so return it as-is.
	if (Op.getOperand(1).getValueType() == MVT::i1 &&
	Op.getConstantOperandVal(2) == 0 &&
	Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
	return Op;
	return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
	}
	case Intrinsic::amdgcn_fcmp: {
	return lowerFCMPIntrinsic(*this, Op.getNode(), DAG);
	}
	case Intrinsic::amdgcn_ballot:
	return lowerBALLOTIntrinsic(*this, Op.getNode(), DAG);
	case Intrinsic::amdgcn_fmed3:
	return DAG.getNode(AMDGPUISD::FMED3, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::amdgcn_fdot2:
	return DAG.getNode(AMDGPUISD::FDOT2, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
	Op.getOperand(4));
	case Intrinsic::amdgcn_fmul_legacy:
	return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
	Op.getOperand(1), Op.getOperand(2));
	case Intrinsic::amdgcn_sffbh:
	return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
	case Intrinsic::amdgcn_sbfe:
	return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::amdgcn_ubfe:
	return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
	Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::amdgcn_cvt_pkrtz:
	case Intrinsic::amdgcn_cvt_pknorm_i16:
	case Intrinsic::amdgcn_cvt_pknorm_u16:
	case Intrinsic::amdgcn_cvt_pk_i16:
	case Intrinsic::amdgcn_cvt_pk_u16: {
	// FIXME: Stop adding cast if v2f16/v2i16 are legal.
	EVT VT = Op.getValueType();
	unsigned Opcode;

	if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
	Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32;
	else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
	Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
	else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
	Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
	else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
	Opcode = AMDGPUISD::CVT_PK_I16_I32;
	else
	Opcode = AMDGPUISD::CVT_PK_U16_U32;

	if (isTypeLegal(VT))
	return DAG.getNode(Opcode, DL, VT, Op.getOperand(1), Op.getOperand(2));

	SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
	Op.getOperand(1), Op.getOperand(2));
	return DAG.getNode(ISD::BITCAST, DL, VT, Node);
	}
	case Intrinsic::amdgcn_fmad_ftz:
	return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));

	case Intrinsic::amdgcn_if_break:
	return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
	Op->getOperand(1), Op->getOperand(2)), 0);

	case Intrinsic::amdgcn_groupstaticsize: {
	Triple::OSType OS = getTargetMachine().getTargetTriple().getOS();
	if (OS == Triple::AMDHSA \|\| OS == Triple::AMDPAL)
	return Op;

	const Module *M = MF.getFunction().getParent();
	const GlobalValue *GV =
	M->getNamedValue(Intrinsic::getName(Intrinsic::amdgcn_groupstaticsize));
	SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
	SIInstrInfo::MO_ABS32_LO);
	return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
	}
	case Intrinsic::amdgcn_is_shared:
	case Intrinsic::amdgcn_is_private: {
	SDLoc SL(Op);
	unsigned AS = (IntrinsicID == Intrinsic::amdgcn_is_shared) ?
	AMDGPUAS::LOCAL_ADDRESS : AMDGPUAS::PRIVATE_ADDRESS;
	SDValue Aperture = getSegmentAperture(AS, SL, DAG);
	SDValue SrcVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32,
	Op.getOperand(1));

	SDValue SrcHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, SrcVec,
	DAG.getConstant(1, SL, MVT::i32));
	return DAG.getSetCC(SL, MVT::i1, SrcHi, Aperture, ISD::SETEQ);
	}
	case Intrinsic::amdgcn_perm:
	return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
	Op.getOperand(2), Op.getOperand(3));
	case Intrinsic::amdgcn_reloc_constant: {
	Module M = const_cast<Module >(MF.getFunction().getParent());
	const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
	auto SymbolName = cast<MDString>(Metadata->getOperand(0))->getString();
	auto RelocSymbol = cast<GlobalVariable>(
	M->getOrInsertGlobal(SymbolName, Type::getInt32Ty(M->getContext())));
	SDValue GA = DAG.getTargetGlobalAddress(RelocSymbol, DL, MVT::i32, 0,
	SIInstrInfo::MO_ABS32_LO);
	return {DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, GA), 0};
	}
	default:
	if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
	AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
	return lowerImage(Op, ImageDimIntr, DAG, false);

	return Op;
	}
	}

	/// Update \p MMO based on the offset inputs to an intrinsic.
	static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset,
	SDValue SOffset, SDValue Offset,
	SDValue VIndex = SDValue()) {
	if (!isa<ConstantSDNode>(VOffset) \|\| !isa<ConstantSDNode>(SOffset) \|\|
	!isa<ConstantSDNode>(Offset)) {
	// The combined offset is not known to be constant, so we cannot represent
	// it in the MMO. Give up.
	MMO->setValue((Value *)nullptr);
	return;
	}

	if (VIndex && (!isa<ConstantSDNode>(VIndex) \|\|
	!cast<ConstantSDNode>(VIndex)->isZero())) {
	// The strided index component of the address is not known to be zero, so we
	// cannot represent it in the MMO. Give up.
	MMO->setValue((Value *)nullptr);
	return;
	}

	MMO->setOffset(cast<ConstantSDNode>(VOffset)->getSExtValue() +
	cast<ConstantSDNode>(SOffset)->getSExtValue() +
	cast<ConstantSDNode>(Offset)->getSExtValue());
	}

	SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
	SelectionDAG &DAG,
	unsigned NewOpcode) const {
	SDLoc DL(Op);

	SDValue VData = Op.getOperand(2);
	auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	VData, // vdata
	Op.getOperand(3), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(5), // soffset
	Offsets.second, // offset
	Op.getOperand(6), // cachepolicy
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};

	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);

	EVT MemVT = VData.getValueType();
	return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
	M->getMemOperand());
	}

	// Return a value to use for the idxen operand by examining the vindex operand.
	static unsigned getIdxEn(SDValue VIndex) {
	if (auto VIndexC = dyn_cast<ConstantSDNode>(VIndex))
	// No need to set idxen if vindex is known to be zero.
	return VIndexC->getZExtValue() != 0;
	return 1;
	}

	SDValue
	SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
	unsigned NewOpcode) const {
	SDLoc DL(Op);

	SDValue VData = Op.getOperand(2);
	auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	VData, // vdata
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	Offsets.first, // voffset
	Op.getOperand(6), // soffset
	Offsets.second, // offset
	Op.getOperand(7), // cachepolicy
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};

	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);

	EVT MemVT = VData.getValueType();
	return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
	M->getMemOperand());
	}

	SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	SDLoc DL(Op);

	switch (IntrID) {
	case Intrinsic::amdgcn_ds_ordered_add:
	case Intrinsic::amdgcn_ds_ordered_swap: {
	MemSDNode *M = cast<MemSDNode>(Op);
	SDValue Chain = M->getOperand(0);
	SDValue M0 = M->getOperand(2);
	SDValue Value = M->getOperand(3);
	unsigned IndexOperand = M->getConstantOperandVal(7);
	unsigned WaveRelease = M->getConstantOperandVal(8);
	unsigned WaveDone = M->getConstantOperandVal(9);

	unsigned OrderedCountIndex = IndexOperand & 0x3f;
	IndexOperand &= ~0x3f;
	unsigned CountDw = 0;

	if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10) {
	CountDw = (IndexOperand >> 24) & 0xf;
	IndexOperand &= ~(0xf << 24);

	if (CountDw < 1 \|\| CountDw > 4) {
	report_fatal_error(
	"ds_ordered_count: dword count must be between 1 and 4");
	}
	}

	if (IndexOperand)
	report_fatal_error("ds_ordered_count: bad index operand");

	if (WaveDone && !WaveRelease)
	report_fatal_error("ds_ordered_count: wave_done requires wave_release");

	unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
	unsigned ShaderType =
	SIInstrInfo::getDSShaderTypeValue(DAG.getMachineFunction());
	unsigned Offset0 = OrderedCountIndex << 2;
	unsigned Offset1 = WaveRelease \| (WaveDone << 1) \| (Instruction << 4);

	if (Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10)
	Offset1 \|= (CountDw - 1) << 6;

	if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX11)
	Offset1 \|= ShaderType << 2;

	unsigned Offset = Offset0 \| (Offset1 << 8);

	SDValue Ops[] = {
	Chain,
	Value,
	DAG.getTargetConstant(Offset, DL, MVT::i16),
	copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
	};
	return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
	M->getVTList(), Ops, M->getMemoryVT(),
	M->getMemOperand());
	}
	case Intrinsic::amdgcn_ds_fadd: {
	MemSDNode *M = cast<MemSDNode>(Op);
	unsigned Opc;
	switch (IntrID) {
	case Intrinsic::amdgcn_ds_fadd:
	Opc = ISD::ATOMIC_LOAD_FADD;
	break;
	}

	return DAG.getAtomic(Opc, SDLoc(Op), M->getMemoryVT(),
	M->getOperand(0), M->getOperand(2), M->getOperand(3),
	M->getMemOperand());
	}
	case Intrinsic::amdgcn_atomic_inc:
	case Intrinsic::amdgcn_atomic_dec:
	case Intrinsic::amdgcn_ds_fmin:
	case Intrinsic::amdgcn_ds_fmax: {
	MemSDNode *M = cast<MemSDNode>(Op);
	unsigned Opc;
	switch (IntrID) {
	case Intrinsic::amdgcn_atomic_inc:
	Opc = AMDGPUISD::ATOMIC_INC;
	break;
	case Intrinsic::amdgcn_atomic_dec:
	Opc = AMDGPUISD::ATOMIC_DEC;
	break;
	case Intrinsic::amdgcn_ds_fmin:
	Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
	break;
	case Intrinsic::amdgcn_ds_fmax:
	Opc = AMDGPUISD::ATOMIC_LOAD_FMAX;
	break;
	default:
	llvm_unreachable("Unknown intrinsic!");
	}
	SDValue Ops[] = {
	M->getOperand(0), // Chain
	M->getOperand(2), // Ptr
	M->getOperand(3) // Value
	};

	return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}
	case Intrinsic::amdgcn_buffer_load:
	case Intrinsic::amdgcn_buffer_load_format: {
	unsigned Glc = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(3));
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	Op.getOperand(3), // vindex
	SDValue(), // voffset -- will be set by setBufferOffsets
	SDValue(), // soffset -- will be set by setBufferOffsets
	SDValue(), // offset -- will be set by setBufferOffsets
	DAG.getTargetConstant(Glc \| (Slc << 1), DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
	};
	setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);

	unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
	AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;

	EVT VT = Op.getValueType();
	EVT IntVT = VT.changeTypeToInteger();
	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
	EVT LoadVT = Op.getValueType();

	if (LoadVT.getScalarType() == MVT::f16)
	return adjustLoadValueType(AMDGPUISD::BUFFER_LOAD_FORMAT_D16,
	M, DAG, Ops);

	// Handle BUFFER_LOAD_BYTE/UBYTE/SHORT/USHORT overloaded intrinsics
	if (LoadVT.getScalarType() == MVT::i8 \|\|
	LoadVT.getScalarType() == MVT::i16)
	return handleByteShortBufferLoads(DAG, LoadVT, DL, Ops, M);

	return getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT,
	M->getMemOperand(), DAG);
	}
	case Intrinsic::amdgcn_raw_buffer_load:
	case Intrinsic::amdgcn_raw_buffer_load_format: {
	const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format;

	auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(4), // soffset
	Offsets.second, // offset
	Op.getOperand(5), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};

	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5]);
	return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
	}
	case Intrinsic::amdgcn_struct_buffer_load:
	case Intrinsic::amdgcn_struct_buffer_load_format: {
	const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format;

	auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	Op.getOperand(3), // vindex
	Offsets.first, // voffset
	Op.getOperand(5), // soffset
	Offsets.second, // offset
	Op.getOperand(6), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};

	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
	return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
	}
	case Intrinsic::amdgcn_tbuffer_load: {
	MemSDNode *M = cast<MemSDNode>(Op);
	EVT LoadVT = Op.getValueType();

	unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
	unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
	unsigned Glc = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(3));
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	Op.getOperand(3), // vindex
	Op.getOperand(4), // voffset
	Op.getOperand(5), // soffset
	Op.getOperand(6), // offset
	DAG.getTargetConstant(Dfmt \| (Nfmt << 4), DL, MVT::i32), // format
	DAG.getTargetConstant(Glc \| (Slc << 1), DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
	};

	if (LoadVT.getScalarType() == MVT::f16)
	return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
	M, DAG, Ops);
	return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
	Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
	DAG);
	}
	case Intrinsic::amdgcn_raw_tbuffer_load: {
	MemSDNode *M = cast<MemSDNode>(Op);
	EVT LoadVT = Op.getValueType();
	auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);

	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(4), // soffset
	Offsets.second, // offset
	Op.getOperand(5), // format
	Op.getOperand(6), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};

	if (LoadVT.getScalarType() == MVT::f16)
	return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
	M, DAG, Ops);
	return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
	Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
	DAG);
	}
	case Intrinsic::amdgcn_struct_tbuffer_load: {
	MemSDNode *M = cast<MemSDNode>(Op);
	EVT LoadVT = Op.getValueType();
	auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);

	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // rsrc
	Op.getOperand(3), // vindex
	Offsets.first, // voffset
	Op.getOperand(5), // soffset
	Offsets.second, // offset
	Op.getOperand(6), // format
	Op.getOperand(7), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};

	if (LoadVT.getScalarType() == MVT::f16)
	return adjustLoadValueType(AMDGPUISD::TBUFFER_LOAD_FORMAT_D16,
	M, DAG, Ops);
	return getMemIntrinsicNode(AMDGPUISD::TBUFFER_LOAD_FORMAT, DL,
	Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
	DAG);
	}
	case Intrinsic::amdgcn_buffer_atomic_swap:
	case Intrinsic::amdgcn_buffer_atomic_add:
	case Intrinsic::amdgcn_buffer_atomic_sub:
	case Intrinsic::amdgcn_buffer_atomic_csub:
	case Intrinsic::amdgcn_buffer_atomic_smin:
	case Intrinsic::amdgcn_buffer_atomic_umin:
	case Intrinsic::amdgcn_buffer_atomic_smax:
	case Intrinsic::amdgcn_buffer_atomic_umax:
	case Intrinsic::amdgcn_buffer_atomic_and:
	case Intrinsic::amdgcn_buffer_atomic_or:
	case Intrinsic::amdgcn_buffer_atomic_xor:
	case Intrinsic::amdgcn_buffer_atomic_fadd: {
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(4));
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // vdata
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	SDValue(), // voffset -- will be set by setBufferOffsets
	SDValue(), // soffset -- will be set by setBufferOffsets
	SDValue(), // offset -- will be set by setBufferOffsets
	DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
	};
	setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);

	EVT VT = Op.getValueType();

	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
	unsigned Opcode = 0;

	switch (IntrID) {
	case Intrinsic::amdgcn_buffer_atomic_swap:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_SWAP;
	break;
	case Intrinsic::amdgcn_buffer_atomic_add:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_ADD;
	break;
	case Intrinsic::amdgcn_buffer_atomic_sub:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_SUB;
	break;
	case Intrinsic::amdgcn_buffer_atomic_csub:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_CSUB;
	break;
	case Intrinsic::amdgcn_buffer_atomic_smin:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_SMIN;
	break;
	case Intrinsic::amdgcn_buffer_atomic_umin:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_UMIN;
	break;
	case Intrinsic::amdgcn_buffer_atomic_smax:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_SMAX;
	break;
	case Intrinsic::amdgcn_buffer_atomic_umax:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_UMAX;
	break;
	case Intrinsic::amdgcn_buffer_atomic_and:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_AND;
	break;
	case Intrinsic::amdgcn_buffer_atomic_or:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_OR;
	break;
	case Intrinsic::amdgcn_buffer_atomic_xor:
	Opcode = AMDGPUISD::BUFFER_ATOMIC_XOR;
	break;
	case Intrinsic::amdgcn_buffer_atomic_fadd:
	if (!Op.getValue(0).use_empty() && !hasAtomicFaddRtnForTy(Op)) {
	DiagnosticInfoUnsupported
	NoFpRet(DAG.getMachineFunction().getFunction(),
	"return versions of fp atomics not supported",
	DL.getDebugLoc(), DS_Error);
	DAG.getContext()->diagnose(NoFpRet);
	return SDValue();
	}
	Opcode = AMDGPUISD::BUFFER_ATOMIC_FADD;
	break;
	default:
	llvm_unreachable("unhandled atomic opcode");
	}

	return DAG.getMemIntrinsicNode(Opcode, DL, Op->getVTList(), Ops, VT,
	M->getMemOperand());
	}
	case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
	case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
	case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
	case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
	case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
	case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
	case Intrinsic::amdgcn_raw_buffer_atomic_swap:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SWAP);
	case Intrinsic::amdgcn_raw_buffer_atomic_add:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
	case Intrinsic::amdgcn_raw_buffer_atomic_sub:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
	case Intrinsic::amdgcn_raw_buffer_atomic_smin:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMIN);
	case Intrinsic::amdgcn_raw_buffer_atomic_umin:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMIN);
	case Intrinsic::amdgcn_raw_buffer_atomic_smax:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMAX);
	case Intrinsic::amdgcn_raw_buffer_atomic_umax:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMAX);
	case Intrinsic::amdgcn_raw_buffer_atomic_and:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
	case Intrinsic::amdgcn_raw_buffer_atomic_or:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
	case Intrinsic::amdgcn_raw_buffer_atomic_xor:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
	case Intrinsic::amdgcn_raw_buffer_atomic_inc:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
	case Intrinsic::amdgcn_raw_buffer_atomic_dec:
	return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
	case Intrinsic::amdgcn_struct_buffer_atomic_swap:
	return lowerStructBufferAtomicIntrin(Op, DAG,
	AMDGPUISD::BUFFER_ATOMIC_SWAP);
	case Intrinsic::amdgcn_struct_buffer_atomic_add:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
	case Intrinsic::amdgcn_struct_buffer_atomic_sub:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
	case Intrinsic::amdgcn_struct_buffer_atomic_smin:
	return lowerStructBufferAtomicIntrin(Op, DAG,
	AMDGPUISD::BUFFER_ATOMIC_SMIN);
	case Intrinsic::amdgcn_struct_buffer_atomic_umin:
	return lowerStructBufferAtomicIntrin(Op, DAG,
	AMDGPUISD::BUFFER_ATOMIC_UMIN);
	case Intrinsic::amdgcn_struct_buffer_atomic_smax:
	return lowerStructBufferAtomicIntrin(Op, DAG,
	AMDGPUISD::BUFFER_ATOMIC_SMAX);
	case Intrinsic::amdgcn_struct_buffer_atomic_umax:
	return lowerStructBufferAtomicIntrin(Op, DAG,
	AMDGPUISD::BUFFER_ATOMIC_UMAX);
	case Intrinsic::amdgcn_struct_buffer_atomic_and:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
	case Intrinsic::amdgcn_struct_buffer_atomic_or:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
	case Intrinsic::amdgcn_struct_buffer_atomic_xor:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
	case Intrinsic::amdgcn_struct_buffer_atomic_inc:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
	case Intrinsic::amdgcn_struct_buffer_atomic_dec:
	return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);

	case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(5));
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // src
	Op.getOperand(3), // cmp
	Op.getOperand(4), // rsrc
	Op.getOperand(5), // vindex
	SDValue(), // voffset -- will be set by setBufferOffsets
	SDValue(), // soffset -- will be set by setBufferOffsets
	SDValue(), // offset -- will be set by setBufferOffsets
	DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
	};
	setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);

	EVT VT = Op.getValueType();
	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);

	return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
	Op->getVTList(), Ops, VT, M->getMemOperand());
	}
	case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
	auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // src
	Op.getOperand(3), // cmp
	Op.getOperand(4), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(6), // soffset
	Offsets.second, // offset
	Op.getOperand(7), // cachepolicy
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};
	EVT VT = Op.getValueType();
	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7]);

	return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
	Op->getVTList(), Ops, VT, M->getMemOperand());
	}
	case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
	auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
	SDValue Ops[] = {
	Op.getOperand(0), // Chain
	Op.getOperand(2), // src
	Op.getOperand(3), // cmp
	Op.getOperand(4), // rsrc
	Op.getOperand(5), // vindex
	Offsets.first, // voffset
	Op.getOperand(7), // soffset
	Offsets.second, // offset
	Op.getOperand(8), // cachepolicy
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};
	EVT VT = Op.getValueType();
	auto *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);

	return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
	Op->getVTList(), Ops, VT, M->getMemOperand());
	}
	case Intrinsic::amdgcn_image_bvh_intersect_ray: {
	MemSDNode *M = cast<MemSDNode>(Op);
	SDValue NodePtr = M->getOperand(2);
	SDValue RayExtent = M->getOperand(3);
	SDValue RayOrigin = M->getOperand(4);
	SDValue RayDir = M->getOperand(5);
	SDValue RayInvDir = M->getOperand(6);
	SDValue TDescr = M->getOperand(7);

	assert(NodePtr.getValueType() == MVT::i32 \|\|
	NodePtr.getValueType() == MVT::i64);
	assert(RayDir.getValueType() == MVT::v3f16 \|\|
	RayDir.getValueType() == MVT::v3f32);

	if (!Subtarget->hasGFX10_AEncoding()) {
	emitRemovedIntrinsicError(DAG, DL, Op.getValueType());
	return SDValue();
	}

	const bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
	const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
	const bool Is64 = NodePtr.getValueType() == MVT::i64;
	const unsigned NumVDataDwords = 4;
	const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
	const unsigned NumVAddrs = IsGFX11Plus ? (IsA16 ? 4 : 5) : NumVAddrDwords;
	const bool UseNSA =
	Subtarget->hasNSAEncoding() && NumVAddrs <= Subtarget->getNSAMaxSize();
	const unsigned BaseOpcodes[2][2] = {
	{AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
	{AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
	AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
	int Opcode;
	if (UseNSA) {
	Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
	IsGFX11Plus ? AMDGPU::MIMGEncGfx11NSA
	: AMDGPU::MIMGEncGfx10NSA,
	NumVDataDwords, NumVAddrDwords);
	} else {
	Opcode =
	AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
	IsGFX11Plus ? AMDGPU::MIMGEncGfx11Default
	: AMDGPU::MIMGEncGfx10Default,
	NumVDataDwords, PowerOf2Ceil(NumVAddrDwords));
	}
	assert(Opcode != -1);

	SmallVector<SDValue, 16> Ops;

	auto packLanes = [&DAG, &Ops, &DL] (SDValue Op, bool IsAligned) {
	SmallVector<SDValue, 3> Lanes;
	DAG.ExtractVectorElements(Op, Lanes, 0, 3);
	if (Lanes[0].getValueSizeInBits() == 32) {
	for (unsigned I = 0; I < 3; ++I)
	Ops.push_back(DAG.getBitcast(MVT::i32, Lanes[I]));
	} else {
	if (IsAligned) {
	Ops.push_back(
	DAG.getBitcast(MVT::i32,
	DAG.getBuildVector(MVT::v2f16, DL,
	{ Lanes[0], Lanes[1] })));
	Ops.push_back(Lanes[2]);
	} else {
	SDValue Elt0 = Ops.pop_back_val();
	Ops.push_back(
	DAG.getBitcast(MVT::i32,
	DAG.getBuildVector(MVT::v2f16, DL,
	{ Elt0, Lanes[0] })));
	Ops.push_back(
	DAG.getBitcast(MVT::i32,
	DAG.getBuildVector(MVT::v2f16, DL,
	{ Lanes[1], Lanes[2] })));
	}
	}
	};

	if (UseNSA && IsGFX11Plus) {
	Ops.push_back(NodePtr);
	Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
	Ops.push_back(RayOrigin);
	if (IsA16) {
	SmallVector<SDValue, 3> DirLanes, InvDirLanes, MergedLanes;
	DAG.ExtractVectorElements(RayDir, DirLanes, 0, 3);
	DAG.ExtractVectorElements(RayInvDir, InvDirLanes, 0, 3);
	for (unsigned I = 0; I < 3; ++I) {
	MergedLanes.push_back(DAG.getBitcast(
	MVT::i32, DAG.getBuildVector(MVT::v2f16, DL,
	{DirLanes[I], InvDirLanes[I]})));
	}
	Ops.push_back(DAG.getBuildVector(MVT::v3i32, DL, MergedLanes));
	} else {
	Ops.push_back(RayDir);
	Ops.push_back(RayInvDir);
	}
	} else {
	if (Is64)
	DAG.ExtractVectorElements(DAG.getBitcast(MVT::v2i32, NodePtr), Ops, 0,
	2);
	else
	Ops.push_back(NodePtr);

	Ops.push_back(DAG.getBitcast(MVT::i32, RayExtent));
	packLanes(RayOrigin, true);
	packLanes(RayDir, true);
	packLanes(RayInvDir, false);
	}

	if (!UseNSA) {
	// Build a single vector containing all the operands so far prepared.
	if (NumVAddrDwords > 8) {
	SDValue Undef = DAG.getUNDEF(MVT::i32);
	Ops.append(16 - Ops.size(), Undef);
	}
	assert(Ops.size() == 8 \|\| Ops.size() == 16);
	SDValue MergedOps = DAG.getBuildVector(
	Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);
	Ops.clear();
	Ops.push_back(MergedOps);
	}

	Ops.push_back(TDescr);
	if (IsA16)
	Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
	Ops.push_back(M->getChain());

	auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
	MachineMemOperand *MemRef = M->getMemOperand();
	DAG.setNodeMemRefs(NewNode, {MemRef});
	return SDValue(NewNode, 0);
	}
	case Intrinsic::amdgcn_global_atomic_fadd:
	if (!Op.getValue(0).use_empty() && !Subtarget->hasGFX90AInsts()) {
	DiagnosticInfoUnsupported
	NoFpRet(DAG.getMachineFunction().getFunction(),
	"return versions of fp atomics not supported",
	DL.getDebugLoc(), DS_Error);
	DAG.getContext()->diagnose(NoFpRet);
	return SDValue();
	}
	LLVM_FALLTHROUGH;
	case Intrinsic::amdgcn_global_atomic_fmin:
	case Intrinsic::amdgcn_global_atomic_fmax:
	case Intrinsic::amdgcn_flat_atomic_fadd:
	case Intrinsic::amdgcn_flat_atomic_fmin:
	case Intrinsic::amdgcn_flat_atomic_fmax: {
	MemSDNode *M = cast<MemSDNode>(Op);
	SDValue Ops[] = {
	M->getOperand(0), // Chain
	M->getOperand(2), // Ptr
	M->getOperand(3) // Value
	};
	unsigned Opcode = 0;
	switch (IntrID) {
	case Intrinsic::amdgcn_global_atomic_fadd:
	case Intrinsic::amdgcn_flat_atomic_fadd: {
	EVT VT = Op.getOperand(3).getValueType();
	return DAG.getAtomic(ISD::ATOMIC_LOAD_FADD, DL, VT,
	DAG.getVTList(VT, MVT::Other), Ops,
	M->getMemOperand());
	}
	case Intrinsic::amdgcn_global_atomic_fmin:
	case Intrinsic::amdgcn_flat_atomic_fmin: {
	Opcode = AMDGPUISD::ATOMIC_LOAD_FMIN;
	break;
	}
	case Intrinsic::amdgcn_global_atomic_fmax:
	case Intrinsic::amdgcn_flat_atomic_fmax: {
	Opcode = AMDGPUISD::ATOMIC_LOAD_FMAX;
	break;
	}
	default:
	llvm_unreachable("unhandled atomic opcode");
	}
	return DAG.getMemIntrinsicNode(Opcode, SDLoc(Op),
	M->getVTList(), Ops, M->getMemoryVT(),
	M->getMemOperand());
	}
	default:

	if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
	AMDGPU::getImageDimIntrinsicInfo(IntrID))
	return lowerImage(Op, ImageDimIntr, DAG, true);

	return SDValue();
	}
	}

	// Call DAG.getMemIntrinsicNode for a load, but first widen a dwordx3 type to
	// dwordx4 if on SI.
	SDValue SITargetLowering::getMemIntrinsicNode(unsigned Opcode, const SDLoc &DL,
	SDVTList VTList,
	ArrayRef<SDValue> Ops, EVT MemVT,
	MachineMemOperand *MMO,
	SelectionDAG &DAG) const {
	EVT VT = VTList.VTs[0];
	EVT WidenedVT = VT;
	EVT WidenedMemVT = MemVT;
	if (!Subtarget->hasDwordx3LoadStores() &&
	(WidenedVT == MVT::v3i32 \|\| WidenedVT == MVT::v3f32)) {
	WidenedVT = EVT::getVectorVT(*DAG.getContext(),
	WidenedVT.getVectorElementType(), 4);
	WidenedMemVT = EVT::getVectorVT(*DAG.getContext(),
	WidenedMemVT.getVectorElementType(), 4);
	MMO = DAG.getMachineFunction().getMachineMemOperand(MMO, 0, 16);
	}

	assert(VTList.NumVTs == 2);
	SDVTList WidenedVTList = DAG.getVTList(WidenedVT, VTList.VTs[1]);

	auto NewOp = DAG.getMemIntrinsicNode(Opcode, DL, WidenedVTList, Ops,
	WidenedMemVT, MMO);
	if (WidenedVT != VT) {
	auto Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, NewOp,
	DAG.getVectorIdxConstant(0, DL));
	NewOp = DAG.getMergeValues({ Extract, SDValue(NewOp.getNode(), 1) }, DL);
	}
	return NewOp;
	}

	SDValue SITargetLowering::handleD16VData(SDValue VData, SelectionDAG &DAG,
	bool ImageStore) const {
	EVT StoreVT = VData.getValueType();

	// No change for f16 and legal vector D16 types.
	if (!StoreVT.isVector())
	return VData;

	SDLoc DL(VData);
	unsigned NumElements = StoreVT.getVectorNumElements();

	if (Subtarget->hasUnpackedD16VMem()) {
	// We need to unpack the packed data to store.
	EVT IntStoreVT = StoreVT.changeTypeToInteger();
	SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);

	EVT EquivStoreVT =
	EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElements);
	SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, EquivStoreVT, IntVData);
	return DAG.UnrollVectorOp(ZExt.getNode());
	}

	// The sq block of gfx8.1 does not estimate register use correctly for d16
	// image store instructions. The data operand is computed as if it were not a
	// d16 image instruction.
	if (ImageStore && Subtarget->hasImageStoreD16Bug()) {
	// Bitcast to i16
	EVT IntStoreVT = StoreVT.changeTypeToInteger();
	SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);

	// Decompose into scalars
	SmallVector<SDValue, 4> Elts;
	DAG.ExtractVectorElements(IntVData, Elts);

	// Group pairs of i16 into v2i16 and bitcast to i32
	SmallVector<SDValue, 4> PackedElts;
	for (unsigned I = 0; I < Elts.size() / 2; I += 1) {
	SDValue Pair =
	DAG.getBuildVector(MVT::v2i16, DL, {Elts[I * 2], Elts[I * 2 + 1]});
	SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
	PackedElts.push_back(IntPair);
	}
	if ((NumElements % 2) == 1) {
	// Handle v3i16
	unsigned I = Elts.size() / 2;
	SDValue Pair = DAG.getBuildVector(MVT::v2i16, DL,
	{Elts[I * 2], DAG.getUNDEF(MVT::i16)});
	SDValue IntPair = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Pair);
	PackedElts.push_back(IntPair);
	}

	// Pad using UNDEF
	PackedElts.resize(Elts.size(), DAG.getUNDEF(MVT::i32));

	// Build final vector
	EVT VecVT =
	EVT::getVectorVT(*DAG.getContext(), MVT::i32, PackedElts.size());
	return DAG.getBuildVector(VecVT, DL, PackedElts);
	}

	if (NumElements == 3) {
	EVT IntStoreVT =
	EVT::getIntegerVT(*DAG.getContext(), StoreVT.getStoreSizeInBits());
	SDValue IntVData = DAG.getNode(ISD::BITCAST, DL, IntStoreVT, VData);

	EVT WidenedStoreVT = EVT::getVectorVT(
	*DAG.getContext(), StoreVT.getVectorElementType(), NumElements + 1);
	EVT WidenedIntVT = EVT::getIntegerVT(*DAG.getContext(),
	WidenedStoreVT.getStoreSizeInBits());
	SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenedIntVT, IntVData);
	return DAG.getNode(ISD::BITCAST, DL, WidenedStoreVT, ZExt);
	}

	assert(isTypeLegal(StoreVT));
	return VData;
	}

	SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	SDValue Chain = Op.getOperand(0);
	unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
	MachineFunction &MF = DAG.getMachineFunction();

	switch (IntrinsicID) {
	case Intrinsic::amdgcn_exp_compr: {
	if (!Subtarget->hasCompressedExport()) {
	DiagnosticInfoUnsupported BadIntrin(
	DAG.getMachineFunction().getFunction(),
	"intrinsic not supported on subtarget", DL.getDebugLoc());
	DAG.getContext()->diagnose(BadIntrin);
	}
	SDValue Src0 = Op.getOperand(4);
	SDValue Src1 = Op.getOperand(5);
	// Hack around illegal type on SI by directly selecting it.
	if (isTypeLegal(Src0.getValueType()))
	return SDValue();

	const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
	SDValue Undef = DAG.getUNDEF(MVT::f32);
	const SDValue Ops[] = {
	Op.getOperand(2), // tgt
	DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
	DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
	Undef, // src2
	Undef, // src3
	Op.getOperand(7), // vm
	DAG.getTargetConstant(1, DL, MVT::i1), // compr
	Op.getOperand(3), // en
	Op.getOperand(0) // Chain
	};

	unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
	return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
	}
	case Intrinsic::amdgcn_s_barrier: {
	if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
	unsigned WGSize = ST.getFlatWorkGroupSizes(MF.getFunction()).second;
	if (WGSize <= ST.getWavefrontSize())
	return SDValue(DAG.getMachineNode(AMDGPU::WAVE_BARRIER, DL, MVT::Other,
	Op.getOperand(0)), 0);
	}
	return SDValue();
	};
	case Intrinsic::amdgcn_tbuffer_store: {
	SDValue VData = Op.getOperand(2);
	bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
	if (IsD16)
	VData = handleD16VData(VData, DAG);
	unsigned Dfmt = cast<ConstantSDNode>(Op.getOperand(8))->getZExtValue();
	unsigned Nfmt = cast<ConstantSDNode>(Op.getOperand(9))->getZExtValue();
	unsigned Glc = cast<ConstantSDNode>(Op.getOperand(10))->getZExtValue();
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(11))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(4));
	SDValue Ops[] = {
	Chain,
	VData, // vdata
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	Op.getOperand(5), // voffset
	Op.getOperand(6), // soffset
	Op.getOperand(7), // offset
	DAG.getTargetConstant(Dfmt \| (Nfmt << 4), DL, MVT::i32), // format
	DAG.getTargetConstant(Glc \| (Slc << 1), DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
	};
	unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
	AMDGPUISD::TBUFFER_STORE_FORMAT;
	MemSDNode *M = cast<MemSDNode>(Op);
	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}

	case Intrinsic::amdgcn_struct_tbuffer_store: {
	SDValue VData = Op.getOperand(2);
	bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
	if (IsD16)
	VData = handleD16VData(VData, DAG);
	auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
	SDValue Ops[] = {
	Chain,
	VData, // vdata
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	Offsets.first, // voffset
	Op.getOperand(6), // soffset
	Offsets.second, // offset
	Op.getOperand(7), // format
	Op.getOperand(8), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};
	unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
	AMDGPUISD::TBUFFER_STORE_FORMAT;
	MemSDNode *M = cast<MemSDNode>(Op);
	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}

	case Intrinsic::amdgcn_raw_tbuffer_store: {
	SDValue VData = Op.getOperand(2);
	bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
	if (IsD16)
	VData = handleD16VData(VData, DAG);
	auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
	SDValue Ops[] = {
	Chain,
	VData, // vdata
	Op.getOperand(3), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(5), // soffset
	Offsets.second, // offset
	Op.getOperand(6), // format
	Op.getOperand(7), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};
	unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
	AMDGPUISD::TBUFFER_STORE_FORMAT;
	MemSDNode *M = cast<MemSDNode>(Op);
	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}

	case Intrinsic::amdgcn_buffer_store:
	case Intrinsic::amdgcn_buffer_store_format: {
	SDValue VData = Op.getOperand(2);
	bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
	if (IsD16)
	VData = handleD16VData(VData, DAG);
	unsigned Glc = cast<ConstantSDNode>(Op.getOperand(6))->getZExtValue();
	unsigned Slc = cast<ConstantSDNode>(Op.getOperand(7))->getZExtValue();
	unsigned IdxEn = getIdxEn(Op.getOperand(4));
	SDValue Ops[] = {
	Chain,
	VData,
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	SDValue(), // voffset -- will be set by setBufferOffsets
	SDValue(), // soffset -- will be set by setBufferOffsets
	SDValue(), // offset -- will be set by setBufferOffsets
	DAG.getTargetConstant(Glc \| (Slc << 1), DL, MVT::i32), // cachepolicy
	DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
	};
	setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);

	unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
	AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
	Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
	MemSDNode *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);

	// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
	EVT VDataType = VData.getValueType().getScalarType();
	if (VDataType == MVT::i8 \|\| VDataType == MVT::i16)
	return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);

	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}

	case Intrinsic::amdgcn_raw_buffer_store:
	case Intrinsic::amdgcn_raw_buffer_store_format: {
	const bool IsFormat =
	IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format;

	SDValue VData = Op.getOperand(2);
	EVT VDataVT = VData.getValueType();
	EVT EltType = VDataVT.getScalarType();
	bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);
	if (IsD16) {
	VData = handleD16VData(VData, DAG);
	VDataVT = VData.getValueType();
	}

	if (!isTypeLegal(VDataVT)) {
	VData =
	DAG.getNode(ISD::BITCAST, DL,
	getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
	}

	auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
	SDValue Ops[] = {
	Chain,
	VData,
	Op.getOperand(3), // rsrc
	DAG.getConstant(0, DL, MVT::i32), // vindex
	Offsets.first, // voffset
	Op.getOperand(5), // soffset
	Offsets.second, // offset
	Op.getOperand(6), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(0, DL, MVT::i1), // idxen
	};
	unsigned Opc =
	IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
	Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
	MemSDNode *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);

	// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
	if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
	return handleByteShortBufferStores(DAG, VDataVT, DL, Ops, M);

	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}

	case Intrinsic::amdgcn_struct_buffer_store:
	case Intrinsic::amdgcn_struct_buffer_store_format: {
	const bool IsFormat =
	IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format;

	SDValue VData = Op.getOperand(2);
	EVT VDataVT = VData.getValueType();
	EVT EltType = VDataVT.getScalarType();
	bool IsD16 = IsFormat && (EltType.getSizeInBits() == 16);

	if (IsD16) {
	VData = handleD16VData(VData, DAG);
	VDataVT = VData.getValueType();
	}

	if (!isTypeLegal(VDataVT)) {
	VData =
	DAG.getNode(ISD::BITCAST, DL,
	getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
	}

	auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
	SDValue Ops[] = {
	Chain,
	VData,
	Op.getOperand(3), // rsrc
	Op.getOperand(4), // vindex
	Offsets.first, // voffset
	Op.getOperand(6), // soffset
	Offsets.second, // offset
	Op.getOperand(7), // cachepolicy, swizzled buffer
	DAG.getTargetConstant(1, DL, MVT::i1), // idxen
	};
	unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
	AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
	Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
	MemSDNode *M = cast<MemSDNode>(Op);
	updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);

	// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
	EVT VDataType = VData.getValueType().getScalarType();
	if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
	return handleByteShortBufferStores(DAG, VDataType, DL, Ops, M);

	return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops,
	M->getMemoryVT(), M->getMemOperand());
	}
	case Intrinsic::amdgcn_raw_buffer_load_lds:
	case Intrinsic::amdgcn_struct_buffer_load_lds: {
	unsigned Opc;
	bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds;
	unsigned OpOffset = HasVIndex ? 1 : 0;
	SDValue VOffset = Op.getOperand(5 + OpOffset);
	auto CVOffset = dyn_cast<ConstantSDNode>(VOffset);
	bool HasVOffset = !CVOffset \|\| !CVOffset->isZero();
	unsigned Size = Op->getConstantOperandVal(4);

	switch (Size) {
	default:
	return SDValue();
	case 1:
	Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
	: AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
	: HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
	: AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
	break;
	case 2:
	Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
	: AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
	: HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
	: AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
	break;
	case 4:
	Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
	: AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
	: HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
	: AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
	break;
	}

	SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));

	SmallVector<SDValue, 8> Ops;

	if (HasVIndex && HasVOffset)
	Ops.push_back(DAG.getBuildVector(MVT::v2i32, DL,
	{ Op.getOperand(5), // VIndex
	VOffset }));
	else if (HasVIndex)
	Ops.push_back(Op.getOperand(5));
	else if (HasVOffset)
	Ops.push_back(VOffset);

	Ops.push_back(Op.getOperand(2)); // rsrc
	Ops.push_back(Op.getOperand(6 + OpOffset)); // soffset
	Ops.push_back(Op.getOperand(7 + OpOffset)); // imm offset
	unsigned Aux = Op.getConstantOperandVal(8 + OpOffset);
	Ops.push_back(
	DAG.getTargetConstant(Aux & AMDGPU::CPol::ALL, DL, MVT::i8)); // cpol
	Ops.push_back(
	DAG.getTargetConstant((Aux >> 3) & 1, DL, MVT::i8)); // swz
	Ops.push_back(M0Val.getValue(0)); // Chain
	Ops.push_back(M0Val.getValue(1)); // Glue

	auto *M = cast<MemSDNode>(Op);
	MachineMemOperand *LoadMMO = M->getMemOperand();
	MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
	LoadPtrI.Offset = Op->getConstantOperandVal(7 + OpOffset);
	MachinePointerInfo StorePtrI = LoadPtrI;
	StorePtrI.V = nullptr;
	StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;

	auto F = LoadMMO->getFlags() &
	~(MachineMemOperand::MOStore \| MachineMemOperand::MOLoad);
	LoadMMO = MF.getMachineMemOperand(LoadPtrI, F \| MachineMemOperand::MOLoad,
	Size, LoadMMO->getBaseAlign());

	MachineMemOperand *StoreMMO =
	MF.getMachineMemOperand(StorePtrI, F \| MachineMemOperand::MOStore,
	sizeof(int32_t), LoadMMO->getBaseAlign());

	auto Load = DAG.getMachineNode(Opc, DL, M->getVTList(), Ops);
	DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});

	return SDValue(Load, 0);
	}
	case Intrinsic::amdgcn_global_load_lds: {
	unsigned Opc;
	unsigned Size = Op->getConstantOperandVal(4);
	switch (Size) {
	default:
	return SDValue();
	case 1:
	Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
	break;
	case 2:
	Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
	break;
	case 4:
	Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
	break;
	}

	auto *M = cast<MemSDNode>(Op);
	SDValue M0Val = copyToM0(DAG, Chain, DL, Op.getOperand(3));

	SmallVector<SDValue, 6> Ops;

	SDValue Addr = Op.getOperand(2); // Global ptr
	SDValue VOffset;
	// Try to split SAddr and VOffset. Global and LDS pointers share the same
	// immediate offset, so we cannot use a regular SelectGlobalSAddr().
	if (Addr->isDivergent() && Addr.getOpcode() == ISD::ADD) {
	SDValue LHS = Addr.getOperand(0);
	SDValue RHS = Addr.getOperand(1);

	if (LHS->isDivergent())
	std::swap(LHS, RHS);

	if (!LHS->isDivergent() && RHS.getOpcode() == ISD::ZERO_EXTEND &&
	RHS.getOperand(0).getValueType() == MVT::i32) {
	// add (i64 sgpr), (zero_extend (i32 vgpr))
	Addr = LHS;
	VOffset = RHS.getOperand(0);
	}
	}

	Ops.push_back(Addr);
	if (!Addr->isDivergent()) {
	Opc = AMDGPU::getGlobalSaddrOp(Opc);
	if (!VOffset)
	VOffset = SDValue(
	DAG.getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32,
	DAG.getTargetConstant(0, DL, MVT::i32)), 0);
	Ops.push_back(VOffset);
	}

	Ops.push_back(Op.getOperand(5)); // Offset
	Ops.push_back(Op.getOperand(6)); // CPol
	Ops.push_back(M0Val.getValue(0)); // Chain
	Ops.push_back(M0Val.getValue(1)); // Glue

	MachineMemOperand *LoadMMO = M->getMemOperand();
	MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
	LoadPtrI.Offset = Op->getConstantOperandVal(5);
	MachinePointerInfo StorePtrI = LoadPtrI;
	LoadPtrI.AddrSpace = AMDGPUAS::GLOBAL_ADDRESS;
	StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
	auto F = LoadMMO->getFlags() &
	~(MachineMemOperand::MOStore \| MachineMemOperand::MOLoad);
	LoadMMO = MF.getMachineMemOperand(LoadPtrI, F \| MachineMemOperand::MOLoad,
	Size, LoadMMO->getBaseAlign());
	MachineMemOperand *StoreMMO =
	MF.getMachineMemOperand(StorePtrI, F \| MachineMemOperand::MOStore,
	sizeof(int32_t), Align(4));

	auto Load = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
	DAG.setNodeMemRefs(Load, {LoadMMO, StoreMMO});

	return SDValue(Load, 0);
	}
	case Intrinsic::amdgcn_end_cf:
	return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
	Op->getOperand(2), Chain), 0);

	default: {
	if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
	AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
	return lowerImage(Op, ImageDimIntr, DAG, true);

	return Op;
	}
	}
	}

	// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
	// offset (the offset that is included in bounds checking and swizzling, to be
	// split between the instruction's voffset and immoffset fields) and soffset
	// (the offset that is excluded from bounds checking and swizzling, to go in
	// the instruction's soffset field). This function takes the first kind of
	// offset and figures out how to split it between voffset and immoffset.
	std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
	SDValue Offset, SelectionDAG &DAG) const {
	SDLoc DL(Offset);
	const unsigned MaxImm = 4095;
	SDValue N0 = Offset;
	ConstantSDNode *C1 = nullptr;

	if ((C1 = dyn_cast<ConstantSDNode>(N0)))
	N0 = SDValue();
	else if (DAG.isBaseWithConstantOffset(N0)) {
	C1 = cast<ConstantSDNode>(N0.getOperand(1));
	N0 = N0.getOperand(0);
	}

	if (C1) {
	unsigned ImmOffset = C1->getZExtValue();
	// If the immediate value is too big for the immoffset field, put the value
	// and -4096 into the immoffset field so that the value that is copied/added
	// for the voffset field is a multiple of 4096, and it stands more chance
	// of being CSEd with the copy/add for another similar load/store.
	// However, do not do that rounding down to a multiple of 4096 if that is a
	// negative number, as it appears to be illegal to have a negative offset
	// in the vgpr, even if adding the immediate offset makes it positive.
	unsigned Overflow = ImmOffset & ~MaxImm;
	ImmOffset -= Overflow;
	if ((int32_t)Overflow < 0) {
	Overflow += ImmOffset;
	ImmOffset = 0;
	}
	C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
	if (Overflow) {
	auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
	if (!N0)
	N0 = OverflowVal;
	else {
	SDValue Ops[] = { N0, OverflowVal };
	N0 = DAG.getNode(ISD::ADD, DL, MVT::i32, Ops);
	}
	}
	}
	if (!N0)
	N0 = DAG.getConstant(0, DL, MVT::i32);
	if (!C1)
	C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
	return {N0, SDValue(C1, 0)};
	}

	// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
	// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
	// pointed to by Offsets.
	void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
	SelectionDAG &DAG, SDValue *Offsets,
	Align Alignment) const {
	SDLoc DL(CombinedOffset);
	if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
	uint32_t Imm = C->getZExtValue();
	uint32_t SOffset, ImmOffset;
	if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
	Alignment)) {
	Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
	Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
	Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
	return;
	}
	}
	if (DAG.isBaseWithConstantOffset(CombinedOffset)) {
	SDValue N0 = CombinedOffset.getOperand(0);
	SDValue N1 = CombinedOffset.getOperand(1);
	uint32_t SOffset, ImmOffset;
	int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
	if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
	Subtarget, Alignment)) {
	Offsets[0] = N0;
	Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
	Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
	return;
	}
	}
	Offsets[0] = CombinedOffset;
	Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
	Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
	}

	// Handle 8 bit and 16 bit buffer loads
	SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
	EVT LoadVT, SDLoc DL,
	ArrayRef<SDValue> Ops,
	MemSDNode *M) const {
	EVT IntVT = LoadVT.changeTypeToInteger();
	unsigned Opc = (LoadVT.getScalarType() == MVT::i8) ?
	AMDGPUISD::BUFFER_LOAD_UBYTE : AMDGPUISD::BUFFER_LOAD_USHORT;

	SDVTList ResList = DAG.getVTList(MVT::i32, MVT::Other);
	SDValue BufferLoad = DAG.getMemIntrinsicNode(Opc, DL, ResList,
	Ops, IntVT,
	M->getMemOperand());
	SDValue LoadVal = DAG.getNode(ISD::TRUNCATE, DL, IntVT, BufferLoad);
	LoadVal = DAG.getNode(ISD::BITCAST, DL, LoadVT, LoadVal);

	return DAG.getMergeValues({LoadVal, BufferLoad.getValue(1)}, DL);
	}

	// Handle 8 bit and 16 bit buffer stores
	SDValue SITargetLowering::handleByteShortBufferStores(SelectionDAG &DAG,
	EVT VDataType, SDLoc DL,
	SDValue Ops[],
	MemSDNode *M) const {
	if (VDataType == MVT::f16)
	Ops[1] = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Ops[1]);

	SDValue BufferStoreExt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Ops[1]);
	Ops[1] = BufferStoreExt;
	unsigned Opc = (VDataType == MVT::i8) ? AMDGPUISD::BUFFER_STORE_BYTE :
	AMDGPUISD::BUFFER_STORE_SHORT;
	ArrayRef<SDValue> OpsRef = makeArrayRef(&Ops[0], 9);
	return DAG.getMemIntrinsicNode(Opc, DL, M->getVTList(), OpsRef, VDataType,
	M->getMemOperand());
	}

	static SDValue getLoadExtOrTrunc(SelectionDAG &DAG,
	ISD::LoadExtType ExtType, SDValue Op,
	const SDLoc &SL, EVT VT) {
	if (VT.bitsLT(Op.getValueType()))
	return DAG.getNode(ISD::TRUNCATE, SL, VT, Op);

	switch (ExtType) {
	case ISD::SEXTLOAD:
	return DAG.getNode(ISD::SIGN_EXTEND, SL, VT, Op);
	case ISD::ZEXTLOAD:
	return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, Op);
	case ISD::EXTLOAD:
	return DAG.getNode(ISD::ANY_EXTEND, SL, VT, Op);
	case ISD::NON_EXTLOAD:
	return Op;
	}

	llvm_unreachable("invalid ext type");
	}

	SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	if (Ld->getAlign() < Align(4) \|\| Ld->isDivergent())
	return SDValue();

	// FIXME: Constant loads should all be marked invariant.
	unsigned AS = Ld->getAddressSpace();
	if (AS != AMDGPUAS::CONSTANT_ADDRESS &&
	AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
	(AS != AMDGPUAS::GLOBAL_ADDRESS \|\| !Ld->isInvariant()))
	return SDValue();

	// Don't do this early, since it may interfere with adjacent load merging for
	// illegal types. We can avoid losing alignment information for exotic types
	// pre-legalize.
	EVT MemVT = Ld->getMemoryVT();
	if ((MemVT.isSimple() && !DCI.isAfterLegalizeDAG()) \|\|
	MemVT.getSizeInBits() >= 32)
	return SDValue();

	SDLoc SL(Ld);

	assert((!MemVT.isVector() \|\| Ld->getExtensionType() == ISD::NON_EXTLOAD) &&
	"unexpected vector extload");

	// TODO: Drop only high part of range.
	SDValue Ptr = Ld->getBasePtr();
	SDValue NewLoad = DAG.getLoad(
	ISD::UNINDEXED, ISD::NON_EXTLOAD, MVT::i32, SL, Ld->getChain(), Ptr,
	Ld->getOffset(), Ld->getPointerInfo(), MVT::i32, Ld->getAlign(),
	Ld->getMemOperand()->getFlags(), Ld->getAAInfo(),
	nullptr); // Drop ranges

	EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
	if (MemVT.isFloatingPoint()) {
	assert(Ld->getExtensionType() == ISD::NON_EXTLOAD &&
	"unexpected fp extload");
	TruncVT = MemVT.changeTypeToInteger();
	}

	SDValue Cvt = NewLoad;
	if (Ld->getExtensionType() == ISD::SEXTLOAD) {
	Cvt = DAG.getNode(ISD::SIGN_EXTEND_INREG, SL, MVT::i32, NewLoad,
	DAG.getValueType(TruncVT));
	} else if (Ld->getExtensionType() == ISD::ZEXTLOAD \|\|
	Ld->getExtensionType() == ISD::NON_EXTLOAD) {
	Cvt = DAG.getZeroExtendInReg(NewLoad, SL, TruncVT);
	} else {
	assert(Ld->getExtensionType() == ISD::EXTLOAD);
	}

	EVT VT = Ld->getValueType(0);
	EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());

	DCI.AddToWorklist(Cvt.getNode());

	// We may need to handle exotic cases, such as i16->i64 extloads, so insert
	// the appropriate extension from the 32-bit load.
	Cvt = getLoadExtOrTrunc(DAG, Ld->getExtensionType(), Cvt, SL, IntVT);
	DCI.AddToWorklist(Cvt.getNode());

	// Handle conversion back to floating point if necessary.
	Cvt = DAG.getNode(ISD::BITCAST, SL, VT, Cvt);

	return DAG.getMergeValues({ Cvt, NewLoad.getValue(1) }, SL);
	}

	SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	LoadSDNode *Load = cast<LoadSDNode>(Op);
	ISD::LoadExtType ExtType = Load->getExtensionType();
	EVT MemVT = Load->getMemoryVT();

	if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
	if (MemVT == MVT::i16 && isTypeLegal(MVT::i16))
	return SDValue();

	// FIXME: Copied from PPC
	// First, load into 32 bits, then truncate to 1 bit.

	SDValue Chain = Load->getChain();
	SDValue BasePtr = Load->getBasePtr();
	MachineMemOperand *MMO = Load->getMemOperand();

	EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;

	SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
	BasePtr, RealMemVT, MMO);

	if (!MemVT.isVector()) {
	SDValue Ops[] = {
	DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
	NewLD.getValue(1)
	};

	return DAG.getMergeValues(Ops, DL);
	}

	SmallVector<SDValue, 3> Elts;
	for (unsigned I = 0, N = MemVT.getVectorNumElements(); I != N; ++I) {
	SDValue Elt = DAG.getNode(ISD::SRL, DL, MVT::i32, NewLD,
	DAG.getConstant(I, DL, MVT::i32));

	Elts.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Elt));
	}

	SDValue Ops[] = {
	DAG.getBuildVector(MemVT, DL, Elts),
	NewLD.getValue(1)
	};

	return DAG.getMergeValues(Ops, DL);
	}

	if (!MemVT.isVector())
	return SDValue();

	assert(Op.getValueType().getVectorElementType() == MVT::i32 &&
	"Custom lowering for non-i32 vectors hasn't been implemented.");

	Align Alignment = Load->getAlign();
	unsigned AS = Load->getAddressSpace();
	if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
	Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
	return SplitVectorLoad(Op, DAG);
	}

	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	// If there is a possibility that flat instruction access scratch memory
	// then we need to use the same legalization rules we use for private.
	if (AS == AMDGPUAS::FLAT_ADDRESS &&
	!Subtarget->hasMultiDwordFlatScratchAddressing())
	AS = MFI->hasFlatScratchInit() ?
	AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;

	unsigned NumElements = MemVT.getVectorNumElements();

	if (AS == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
	if (!Op->isDivergent() && Alignment >= Align(4) && NumElements < 32) {
	if (MemVT.isPow2VectorType())
	return SDValue();
	return WidenOrSplitVectorLoad(Op, DAG);
	}
	// Non-uniform loads will be selected to MUBUF instructions, so they
	// have the same legalization requirements as global and private
	// loads.
	//
	}

	if (AS == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT \|\|
	AS == AMDGPUAS::GLOBAL_ADDRESS) {
	if (Subtarget->getScalarizeGlobalBehavior() && !Op->isDivergent() &&
	Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load) &&
	Alignment >= Align(4) && NumElements < 32) {
	if (MemVT.isPow2VectorType())
	return SDValue();
	return WidenOrSplitVectorLoad(Op, DAG);
	}
	// Non-uniform loads will be selected to MUBUF instructions, so they
	// have the same legalization requirements as global and private
	// loads.
	//
	}
	if (AS == AMDGPUAS::CONSTANT_ADDRESS \|\|
	AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT \|\|
	AS == AMDGPUAS::GLOBAL_ADDRESS \|\|
	AS == AMDGPUAS::FLAT_ADDRESS) {
	if (NumElements > 4)
	return SplitVectorLoad(Op, DAG);
	// v3 loads not supported on SI.
	if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
	return WidenOrSplitVectorLoad(Op, DAG);

	// v3 and v4 loads are supported for private and global memory.
	return SDValue();
	}
	if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
	// Depending on the setting of the private_element_size field in the
	// resource descriptor, we can only make private accesses up to a certain
	// size.
	switch (Subtarget->getMaxPrivateElementSize()) {
	case 4: {
	SDValue Ops[2];
	std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
	return DAG.getMergeValues(Ops, DL);
	}
	case 8:
	if (NumElements > 2)
	return SplitVectorLoad(Op, DAG);
	return SDValue();
	case 16:
	// Same as global/flat
	if (NumElements > 4)
	return SplitVectorLoad(Op, DAG);
	// v3 loads not supported on SI.
	if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
	return WidenOrSplitVectorLoad(Op, DAG);

	return SDValue();
	default:
	llvm_unreachable("unsupported private_element_size");
	}
	} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
	bool Fast = false;
	auto Flags = Load->getMemOperand()->getFlags();
	if (allowsMisalignedMemoryAccessesImpl(MemVT.getSizeInBits(), AS,
	Load->getAlign(), Flags, &Fast) &&
	Fast)
	return SDValue();

	if (MemVT.isVector())
	return SplitVectorLoad(Op, DAG);
	}

	if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
	MemVT, *Load->getMemOperand())) {
	SDValue Ops[2];
	std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
	return DAG.getMergeValues(Ops, DL);
	}

	return SDValue();
	}

	SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();
	if (VT.getSizeInBits() == 128 \|\| VT.getSizeInBits() == 256)
	return splitTernaryVectorOp(Op, DAG);

	assert(VT.getSizeInBits() == 64);

	SDLoc DL(Op);
	SDValue Cond = Op.getOperand(0);

	SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
	SDValue One = DAG.getConstant(1, DL, MVT::i32);

	SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
	SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));

	SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
	SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);

	SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);

	SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
	SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);

	SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);

	SDValue Res = DAG.getBuildVector(MVT::v2i32, DL, {Lo, Hi});
	return DAG.getNode(ISD::BITCAST, DL, VT, Res);
	}

	// Catch division cases where we can use shortcuts with rcp and rsq
	// instructions.
	SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	EVT VT = Op.getValueType();
	const SDNodeFlags Flags = Op->getFlags();

	bool AllowInaccurateRcp = Flags.hasApproximateFuncs();

	// Without !fpmath accuracy information, we can't do more because we don't
	// know exactly whether rcp is accurate enough to meet !fpmath requirement.
	if (!AllowInaccurateRcp)
	return SDValue();

	if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
	if (CLHS->isExactlyValue(1.0)) {
	// v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
	// the CI documentation has a worst case error of 1 ulp.
	// OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
	// use it as long as we aren't trying to use denormals.
	//
	// v_rcp_f16 and v_rsq_f16 DO support denormals.

	// 1.0 / sqrt(x) -> rsq(x)

	// XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
	// error seems really high at 2^29 ULP.
	if (RHS.getOpcode() == ISD::FSQRT)
	return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));

	// 1.0 / x -> rcp(x)
	return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
	}

	// Same as for 1.0, but expand the sign out of the constant.
	if (CLHS->isExactlyValue(-1.0)) {
	// -1.0 / x -> rcp (fneg x)
	SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
	return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS);
	}
	}

	// Turn into multiply by the reciprocal.
	// x / y -> x * (1.0 / y)
	SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
	return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, Flags);
	}

	SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op,
	SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue X = Op.getOperand(0);
	SDValue Y = Op.getOperand(1);
	EVT VT = Op.getValueType();
	const SDNodeFlags Flags = Op->getFlags();

	bool AllowInaccurateDiv = Flags.hasApproximateFuncs() \|\|
	DAG.getTarget().Options.UnsafeFPMath;
	if (!AllowInaccurateDiv)
	return SDValue();

	SDValue NegY = DAG.getNode(ISD::FNEG, SL, VT, Y);
	SDValue One = DAG.getConstantFP(1.0, SL, VT);

	SDValue R = DAG.getNode(AMDGPUISD::RCP, SL, VT, Y);
	SDValue Tmp0 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);

	R = DAG.getNode(ISD::FMA, SL, VT, Tmp0, R, R);
	SDValue Tmp1 = DAG.getNode(ISD::FMA, SL, VT, NegY, R, One);
	R = DAG.getNode(ISD::FMA, SL, VT, Tmp1, R, R);
	SDValue Ret = DAG.getNode(ISD::FMUL, SL, VT, X, R);
	SDValue Tmp2 = DAG.getNode(ISD::FMA, SL, VT, NegY, Ret, X);
	return DAG.getNode(ISD::FMA, SL, VT, Tmp2, R, Ret);
	}

	static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
	EVT VT, SDValue A, SDValue B, SDValue GlueChain,
	SDNodeFlags Flags) {
	if (GlueChain->getNumValues() <= 1) {
	return DAG.getNode(Opcode, SL, VT, A, B, Flags);
	}

	assert(GlueChain->getNumValues() == 3);

	SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
	switch (Opcode) {
	default: llvm_unreachable("no chain equivalent for opcode");
	case ISD::FMUL:
	Opcode = AMDGPUISD::FMUL_W_CHAIN;
	break;
	}

	return DAG.getNode(Opcode, SL, VTList,
	{GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
	Flags);
	}

	static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
	EVT VT, SDValue A, SDValue B, SDValue C,
	SDValue GlueChain, SDNodeFlags Flags) {
	if (GlueChain->getNumValues() <= 1) {
	return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
	}

	assert(GlueChain->getNumValues() == 3);

	SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
	switch (Opcode) {
	default: llvm_unreachable("no chain equivalent for opcode");
	case ISD::FMA:
	Opcode = AMDGPUISD::FMA_W_CHAIN;
	break;
	}

	return DAG.getNode(Opcode, SL, VTList,
	{GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
	Flags);
	}

	SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
	if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
	return FastLowered;

	SDLoc SL(Op);
	SDValue Src0 = Op.getOperand(0);
	SDValue Src1 = Op.getOperand(1);

	SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
	SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);

	SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
	SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);

	SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
	SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);

	return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
	}

	// Faster 2.5 ULP division that does not support denormals.
	SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
	SDLoc SL(Op);
	SDValue LHS = Op.getOperand(1);
	SDValue RHS = Op.getOperand(2);

	SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);

	const APFloat K0Val(BitsToFloat(0x6f800000));
	const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);

	const APFloat K1Val(BitsToFloat(0x2f800000));
	const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);

	const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);

	EVT SetCCVT =
	getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f32);

	SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);

	SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);

	// TODO: Should this propagate fast-math-flags?
	r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);

	// rcp does not support denormals.
	SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);

	SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);

	return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
	}

	// Returns immediate value for setting the F32 denorm mode when using the
	// S_DENORM_MODE instruction.
	static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
	const SDLoc &SL, const GCNSubtarget *ST) {
	assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
	int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
	? FP_DENORM_FLUSH_NONE
	: FP_DENORM_FLUSH_IN_FLUSH_OUT;

	int Mode = SPDenormMode \| (DPDenormModeDefault << 2);
	return DAG.getTargetConstant(Mode, SL, MVT::i32);
	}

	SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
	if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
	return FastLowered;

	// The selection matcher assumes anything with a chain selecting to a
	// mayRaiseFPException machine instruction. Since we're introducing a chain
	// here, we need to explicitly report nofpexcept for the regular fdiv
	// lowering.
	SDNodeFlags Flags = Op->getFlags();
	Flags.setNoFPExcept(true);

	SDLoc SL(Op);
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);

	const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);

	SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);

	SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
	{RHS, RHS, LHS}, Flags);
	SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
	{LHS, RHS, LHS}, Flags);

	// Denominator is scaled to not be denormal, so using rcp is ok.
	SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
	DenominatorScaled, Flags);
	SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
	DenominatorScaled, Flags);

	const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE \|
	(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) \|
	(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
	const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);

	const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());

	if (!HasFP32Denormals) {
	// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
	// lowering. The chain dependence is insufficient, and we need glue. We do
	// not need the glue variants in a strictfp function.

	SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);

	SDNode *EnableDenorm;
	if (Subtarget->hasDenormModeInst()) {
	const SDValue EnableDenormValue =
	getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);

	EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
	DAG.getEntryNode(), EnableDenormValue).getNode();
	} else {
	const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
	SL, MVT::i32);
	EnableDenorm =
	DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
	{EnableDenormValue, BitField, DAG.getEntryNode()});
	}

	SDValue Ops[3] = {
	NegDivScale0,
	SDValue(EnableDenorm, 0),
	SDValue(EnableDenorm, 1)
	};

	NegDivScale0 = DAG.getMergeValues(Ops, SL);
	}

	SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
	ApproxRcp, One, NegDivScale0, Flags);

	SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
	ApproxRcp, Fma0, Flags);

	SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
	Fma1, Fma1, Flags);

	SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
	NumeratorScaled, Mul, Flags);

	SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
	Fma2, Fma1, Mul, Fma2, Flags);

	SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
	NumeratorScaled, Fma3, Flags);

	if (!HasFP32Denormals) {
	SDNode *DisableDenorm;
	if (Subtarget->hasDenormModeInst()) {
	const SDValue DisableDenormValue =
	getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);

	DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
	Fma4.getValue(1), DisableDenormValue,
	Fma4.getValue(2)).getNode();
	} else {
	const SDValue DisableDenormValue =
	DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);

	DisableDenorm = DAG.getMachineNode(
	AMDGPU::S_SETREG_B32, SL, MVT::Other,
	{DisableDenormValue, BitField, Fma4.getValue(1), Fma4.getValue(2)});
	}

	SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
	SDValue(DisableDenorm, 0), DAG.getRoot());
	DAG.setRoot(OutputChain);
	}

	SDValue Scale = NumeratorScaled.getValue(1);
	SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
	{Fma4, Fma1, Fma3, Scale}, Flags);

	return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
	}

	SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
	if (SDValue FastLowered = lowerFastUnsafeFDIV64(Op, DAG))
	return FastLowered;

	SDLoc SL(Op);
	SDValue X = Op.getOperand(0);
	SDValue Y = Op.getOperand(1);

	const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);

	SDVTList ScaleVT = DAG.getVTList(MVT::f64, MVT::i1);

	SDValue DivScale0 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, Y, Y, X);

	SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f64, DivScale0);

	SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f64, DivScale0);

	SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Rcp, One);

	SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f64, Rcp, Fma0, Rcp);

	SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f64, NegDivScale0, Fma1, One);

	SDValue DivScale1 = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, X, Y, X);

	SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f64, Fma1, Fma2, Fma1);
	SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, DivScale1, Fma3);

	SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f64,
	NegDivScale0, Mul, DivScale1);

	SDValue Scale;

	if (!Subtarget->hasUsableDivScaleConditionOutput()) {
	// Workaround a hardware bug on SI where the condition output from div_scale
	// is not usable.

	const SDValue Hi = DAG.getConstant(1, SL, MVT::i32);

	// Figure out if the scale to use for div_fmas.
	SDValue NumBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, X);
	SDValue DenBC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Y);
	SDValue Scale0BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale0);
	SDValue Scale1BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, DivScale1);

	SDValue NumHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, NumBC, Hi);
	SDValue DenHi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, DenBC, Hi);

	SDValue Scale0Hi
	= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale0BC, Hi);
	SDValue Scale1Hi
	= DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Scale1BC, Hi);

	SDValue CmpDen = DAG.getSetCC(SL, MVT::i1, DenHi, Scale0Hi, ISD::SETEQ);
	SDValue CmpNum = DAG.getSetCC(SL, MVT::i1, NumHi, Scale1Hi, ISD::SETEQ);
	Scale = DAG.getNode(ISD::XOR, SL, MVT::i1, CmpNum, CmpDen);
	} else {
	Scale = DivScale1.getValue(1);
	}

	SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f64,
	Fma4, Fma3, Mul, Scale);

	return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f64, Fmas, Y, X);
	}

	SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
	EVT VT = Op.getValueType();

	if (VT == MVT::f32)
	return LowerFDIV32(Op, DAG);

	if (VT == MVT::f64)
	return LowerFDIV64(Op, DAG);

	if (VT == MVT::f16)
	return LowerFDIV16(Op, DAG);

	llvm_unreachable("Unexpected type for fdiv");
	}

	SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	StoreSDNode *Store = cast<StoreSDNode>(Op);
	EVT VT = Store->getMemoryVT();

	if (VT == MVT::i1) {
	return DAG.getTruncStore(Store->getChain(), DL,
	DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
	Store->getBasePtr(), MVT::i1, Store->getMemOperand());
	}

	assert(VT.isVector() &&
	Store->getValue().getValueType().getScalarType() == MVT::i32);

	unsigned AS = Store->getAddressSpace();
	if (Subtarget->hasLDSMisalignedBug() &&
	AS == AMDGPUAS::FLAT_ADDRESS &&
	Store->getAlign().value() < VT.getStoreSize() && VT.getSizeInBits() > 32) {
	return SplitVectorStore(Op, DAG);
	}

	MachineFunction &MF = DAG.getMachineFunction();
	SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
	// If there is a possibility that flat instruction access scratch memory
	// then we need to use the same legalization rules we use for private.
	if (AS == AMDGPUAS::FLAT_ADDRESS &&
	!Subtarget->hasMultiDwordFlatScratchAddressing())
	AS = MFI->hasFlatScratchInit() ?
	AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;

	unsigned NumElements = VT.getVectorNumElements();
	if (AS == AMDGPUAS::GLOBAL_ADDRESS \|\|
	AS == AMDGPUAS::FLAT_ADDRESS) {
	if (NumElements > 4)
	return SplitVectorStore(Op, DAG);
	// v3 stores not supported on SI.
	if (NumElements == 3 && !Subtarget->hasDwordx3LoadStores())
	return SplitVectorStore(Op, DAG);

	if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
	VT, *Store->getMemOperand()))
	return expandUnalignedStore(Store, DAG);

	return SDValue();
	} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
	switch (Subtarget->getMaxPrivateElementSize()) {
	case 4:
	return scalarizeVectorStore(Store, DAG);
	case 8:
	if (NumElements > 2)
	return SplitVectorStore(Op, DAG);
	return SDValue();
	case 16:
	if (NumElements > 4 \|\|
	(NumElements == 3 && !Subtarget->enableFlatScratch()))
	return SplitVectorStore(Op, DAG);
	return SDValue();
	default:
	llvm_unreachable("unsupported private_element_size");
	}
	} else if (AS == AMDGPUAS::LOCAL_ADDRESS \|\| AS == AMDGPUAS::REGION_ADDRESS) {
	bool Fast = false;
	auto Flags = Store->getMemOperand()->getFlags();
	if (allowsMisalignedMemoryAccessesImpl(VT.getSizeInBits(), AS,
	Store->getAlign(), Flags, &Fast) &&
	Fast)
	return SDValue();

	if (VT.isVector())
	return SplitVectorStore(Op, DAG);

	return expandUnalignedStore(Store, DAG);
	}

	// Probably an invalid store. If so we'll end up emitting a selection error.
	return SDValue();
	}

	SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue Arg = Op.getOperand(0);
	SDValue TrigVal;

	// Propagate fast-math flags so that the multiply we introduce can be folded
	// if Arg is already the result of a multiply by constant.
	auto Flags = Op->getFlags();

	SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);

	if (Subtarget->hasTrigReducedRange()) {
	SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
	TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal, Flags);
	} else {
	TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
	}

	switch (Op.getOpcode()) {
	case ISD::FCOS:
	return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal, Flags);
	case ISD::FSIN:
	return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal, Flags);
	default:
	llvm_unreachable("Wrong trig opcode");
	}
	}

	SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
	AtomicSDNode *AtomicNode = cast<AtomicSDNode>(Op);
	assert(AtomicNode->isCompareAndSwap());
	unsigned AS = AtomicNode->getAddressSpace();

	// No custom lowering required for local address space
	if (!AMDGPU::isFlatGlobalAddrSpace(AS))
	return Op;

	// Non-local address space requires custom lowering for atomic compare
	// and swap; cmp and swap should be in a v2i32 or v2i64 in case of _X2
	SDLoc DL(Op);
	SDValue ChainIn = Op.getOperand(0);
	SDValue Addr = Op.getOperand(1);
	SDValue Old = Op.getOperand(2);
	SDValue New = Op.getOperand(3);
	EVT VT = Op.getValueType();
	MVT SimpleVT = VT.getSimpleVT();
	MVT VecType = MVT::getVectorVT(SimpleVT, 2);

	SDValue NewOld = DAG.getBuildVector(VecType, DL, {New, Old});
	SDValue Ops[] = { ChainIn, Addr, NewOld };

	return DAG.getMemIntrinsicNode(AMDGPUISD::ATOMIC_CMP_SWAP, DL, Op->getVTList(),
	Ops, VT, AtomicNode->getMemOperand());
	}

	//===----------------------------------------------------------------------===//
	// Custom DAG optimizations
	//===----------------------------------------------------------------------===//

	SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);
	EVT ScalarVT = VT.getScalarType();
	if (ScalarVT != MVT::f32 && ScalarVT != MVT::f16)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc DL(N);

	SDValue Src = N->getOperand(0);
	EVT SrcVT = Src.getValueType();

	// TODO: We could try to match extracting the higher bytes, which would be
	// easier if i8 vectors weren't promoted to i32 vectors, particularly after
	// types are legalized. v4i8 -> v4f32 is probably the only case to worry
	// about in practice.
	if (DCI.isAfterLegalizeDAG() && SrcVT == MVT::i32) {
	if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
	SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, MVT::f32, Src);
	DCI.AddToWorklist(Cvt.getNode());

	// For the f16 case, fold to a cast to f32 and then cast back to f16.
	if (ScalarVT != MVT::f32) {
	Cvt = DAG.getNode(ISD::FP_ROUND, DL, VT, Cvt,
	DAG.getTargetConstant(0, DL, MVT::i32));
	}
	return Cvt;
	}
	}

	return SDValue();
	}

	// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)

	// This is a variant of
	// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
	//
	// The normal DAG combiner will do this, but only if the add has one use since
	// that would increase the number of instructions.
	//
	// This prevents us from seeing a constant offset that can be folded into a
	// memory instruction's addressing mode. If we know the resulting add offset of
	// a pointer can be folded into an addressing offset, we can replace the pointer
	// operand with the add of new constant offset. This eliminates one of the uses,
	// and may allow the remaining use to also be simplified.
	//
	SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
	unsigned AddrSpace,
	EVT MemVT,
	DAGCombinerInfo &DCI) const {
	SDValue N0 = N->getOperand(0);
	SDValue N1 = N->getOperand(1);

	// We only do this to handle cases where it's profitable when there are
	// multiple uses of the add, so defer to the standard combine.
	if ((N0.getOpcode() != ISD::ADD && N0.getOpcode() != ISD::OR) \|\|
	N0->hasOneUse())
	return SDValue();

	const ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N1);
	if (!CN1)
	return SDValue();

	const ConstantSDNode *CAdd = dyn_cast<ConstantSDNode>(N0.getOperand(1));
	if (!CAdd)
	return SDValue();

	// If the resulting offset is too large, we can't fold it into the addressing
	// mode offset.
	APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
	Type Ty = MemVT.getTypeForEVT(DCI.DAG.getContext());

	AddrMode AM;
	AM.HasBaseReg = true;
	AM.BaseOffs = Offset.getSExtValue();
	if (!isLegalAddressingMode(DCI.DAG.getDataLayout(), AM, Ty, AddrSpace))
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);
	EVT VT = N->getValueType(0);

	SDValue ShlX = DAG.getNode(ISD::SHL, SL, VT, N0.getOperand(0), N1);
	SDValue COffset = DAG.getConstant(Offset, SL, VT);

	SDNodeFlags Flags;
	Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() &&
	(N0.getOpcode() == ISD::OR \|\|
	N0->getFlags().hasNoUnsignedWrap()));

	return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset, Flags);
	}

	/// MemSDNode::getBasePtr() does not work for intrinsics, which needs to offset
	/// by the chain and intrinsic ID. Theoretically we would also need to check the
	/// specific intrinsic, but they all place the pointer operand first.
	static unsigned getBasePtrIndex(const MemSDNode *N) {
	switch (N->getOpcode()) {
	case ISD::STORE:
	case ISD::INTRINSIC_W_CHAIN:
	case ISD::INTRINSIC_VOID:
	return 2;
	default:
	return 1;
	}
	}

	SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);

	unsigned PtrIdx = getBasePtrIndex(N);
	SDValue Ptr = N->getOperand(PtrIdx);

	// TODO: We could also do this for multiplies.
	if (Ptr.getOpcode() == ISD::SHL) {
	SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), N->getAddressSpace(),
	N->getMemoryVT(), DCI);
	if (NewPtr) {
	SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());

	NewOps[PtrIdx] = NewPtr;
	return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
	}
	}

	return SDValue();
	}

	static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) {
	return (Opc == ISD::AND && (Val == 0 \|\| Val == 0xffffffff)) \|\|
	(Opc == ISD::OR && (Val == 0xffffffff \|\| Val == 0)) \|\|
	(Opc == ISD::XOR && Val == 0);
	}

	// Break up 64-bit bit operation of a constant into two 32-bit and/or/xor. This
	// will typically happen anyway for a VALU 64-bit and. This exposes other 32-bit
	// integer combine opportunities since most 64-bit operations are decomposed
	// this way. TODO: We won't want this for SALU especially if it is an inline
	// immediate.
	SDValue SITargetLowering::splitBinaryBitConstantOp(
	DAGCombinerInfo &DCI,
	const SDLoc &SL,
	unsigned Opc, SDValue LHS,
	const ConstantSDNode *CRHS) const {
	uint64_t Val = CRHS->getZExtValue();
	uint32_t ValLo = Lo_32(Val);
	uint32_t ValHi = Hi_32(Val);
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();

	if ((bitOpWithConstantIsReducible(Opc, ValLo) \|\|
	bitOpWithConstantIsReducible(Opc, ValHi)) \|\|
	(CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
	// If we need to materialize a 64-bit immediate, it will be split up later
	// anyway. Avoid creating the harder to understand 64-bit immediate
	// materialization.
	return splitBinaryBitConstantOpImpl(DCI, SL, Opc, LHS, ValLo, ValHi);
	}

	return SDValue();
	}

	// Returns true if argument is a boolean value which is not serialized into
	// memory or argument and does not require v_cndmask_b32 to be deserialized.
	static bool isBoolSGPR(SDValue V) {
	if (V.getValueType() != MVT::i1)
	return false;
	switch (V.getOpcode()) {
	default:
	break;
	case ISD::SETCC:
	case AMDGPUISD::FP_CLASS:
	return true;
	case ISD::AND:
	case ISD::OR:
	case ISD::XOR:
	return isBoolSGPR(V.getOperand(0)) && isBoolSGPR(V.getOperand(1));
	}
	return false;
	}

	// If a constant has all zeroes or all ones within each byte return it.
	// Otherwise return 0.
	static uint32_t getConstantPermuteMask(uint32_t C) {
	// 0xff for any zero byte in the mask
	uint32_t ZeroByteMask = 0;
	if (!(C & 0x000000ff)) ZeroByteMask \|= 0x000000ff;
	if (!(C & 0x0000ff00)) ZeroByteMask \|= 0x0000ff00;
	if (!(C & 0x00ff0000)) ZeroByteMask \|= 0x00ff0000;
	if (!(C & 0xff000000)) ZeroByteMask \|= 0xff000000;
	uint32_t NonZeroByteMask = ~ZeroByteMask; // 0xff for any non-zero byte
	if ((NonZeroByteMask & C) != NonZeroByteMask)
	return 0; // Partial bytes selected.
	return C;
	}

	// Check if a node selects whole bytes from its operand 0 starting at a byte
	// boundary while masking the rest. Returns select mask as in the v_perm_b32
	// or -1 if not succeeded.
	// Note byte select encoding:
	// value 0-3 selects corresponding source byte;
	// value 0xc selects zero;
	// value 0xff selects 0xff.
	static uint32_t getPermuteMask(SelectionDAG &DAG, SDValue V) {
	assert(V.getValueSizeInBits() == 32);

	if (V.getNumOperands() != 2)
	return ~0;

	ConstantSDNode *N1 = dyn_cast<ConstantSDNode>(V.getOperand(1));
	if (!N1)
	return ~0;

	uint32_t C = N1->getZExtValue();

	switch (V.getOpcode()) {
	default:
	break;
	case ISD::AND:
	if (uint32_t ConstMask = getConstantPermuteMask(C)) {
	return (0x03020100 & ConstMask) \| (0x0c0c0c0c & ~ConstMask);
	}
	break;

	case ISD::OR:
	if (uint32_t ConstMask = getConstantPermuteMask(C)) {
	return (0x03020100 & ~ConstMask) \| ConstMask;
	}
	break;

	case ISD::SHL:
	if (C % 8)
	return ~0;

	return uint32_t((0x030201000c0c0c0cull << C) >> 32);

	case ISD::SRL:
	if (C % 8)
	return ~0;

	return uint32_t(0x0c0c0c0c03020100ull >> C);
	}

	return ~0;
	}

	SDValue SITargetLowering::performAndCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (DCI.isBeforeLegalize())
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);


	const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
	if (VT == MVT::i64 && CRHS) {
	if (SDValue Split
	= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
	return Split;
	}

	if (CRHS && VT == MVT::i32) {
	// and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb
	// nb = number of trailing zeroes in mask
	// It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass,
	// given that we are selecting 8 or 16 bit fields starting at byte boundary.
	uint64_t Mask = CRHS->getZExtValue();
	unsigned Bits = countPopulation(Mask);
	if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL &&
	(Bits == 8 \|\| Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) {
	if (auto *CShift = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
	unsigned Shift = CShift->getZExtValue();
	unsigned NB = CRHS->getAPIntValue().countTrailingZeros();
	unsigned Offset = NB + Shift;
	if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary.
	SDLoc SL(N);
	SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
	LHS->getOperand(0),
	DAG.getConstant(Offset, SL, MVT::i32),
	DAG.getConstant(Bits, SL, MVT::i32));
	EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits);
	SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE,
	DAG.getValueType(NarrowVT));
	SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext,
	DAG.getConstant(NB, SDLoc(CRHS), MVT::i32));
	return Shl;
	}
	}
	}

	// and (perm x, y, c1), c2 -> perm x, y, permute_mask(c1, c2)
	if (LHS.hasOneUse() && LHS.getOpcode() == AMDGPUISD::PERM &&
	isa<ConstantSDNode>(LHS.getOperand(2))) {
	uint32_t Sel = getConstantPermuteMask(Mask);
	if (!Sel)
	return SDValue();

	// Select 0xc for all zero bytes
	Sel = (LHS.getConstantOperandVal(2) & Sel) \| (~Sel & 0x0c0c0c0c);
	SDLoc DL(N);
	return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, LHS.getOperand(0),
	LHS.getOperand(1), DAG.getConstant(Sel, DL, MVT::i32));
	}
	}

	// (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
	// fp_class x, ~(s_nan \| q_nan \| n_infinity \| p_infinity)
	if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == ISD::SETCC) {
	ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
	ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();

	SDValue X = LHS.getOperand(0);
	SDValue Y = RHS.getOperand(0);
	if (Y.getOpcode() != ISD::FABS \|\| Y.getOperand(0) != X)
	return SDValue();

	if (LCC == ISD::SETO) {
	if (X != LHS.getOperand(1))
	return SDValue();

	if (RCC == ISD::SETUNE) {
	const ConstantFPSDNode *C1 = dyn_cast<ConstantFPSDNode>(RHS.getOperand(1));
	if (!C1 \|\| !C1->isInfinity() \|\| C1->isNegative())
	return SDValue();

	const uint32_t Mask = SIInstrFlags::N_NORMAL \|
	SIInstrFlags::N_SUBNORMAL \|
	SIInstrFlags::N_ZERO \|
	SIInstrFlags::P_ZERO \|
	SIInstrFlags::P_SUBNORMAL \|
	SIInstrFlags::P_NORMAL;

	static_assert(((~(SIInstrFlags::S_NAN \|
	SIInstrFlags::Q_NAN \|
	SIInstrFlags::N_INFINITY \|
	SIInstrFlags::P_INFINITY)) & 0x3ff) == Mask,
	"mask not equal");

	SDLoc DL(N);
	return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
	X, DAG.getConstant(Mask, DL, MVT::i32));
	}
	}
	}

	if (RHS.getOpcode() == ISD::SETCC && LHS.getOpcode() == AMDGPUISD::FP_CLASS)
	std::swap(LHS, RHS);

	if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == AMDGPUISD::FP_CLASS &&
	RHS.hasOneUse()) {
	ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
	// and (fcmp seto), (fp_class x, mask) -> fp_class x, mask & ~(p_nan \| n_nan)
	// and (fcmp setuo), (fp_class x, mask) -> fp_class x, mask & (p_nan \| n_nan)
	const ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
	if ((LCC == ISD::SETO \|\| LCC == ISD::SETUO) && Mask &&
	(RHS.getOperand(0) == LHS.getOperand(0) &&
	LHS.getOperand(0) == LHS.getOperand(1))) {
	const unsigned OrdMask = SIInstrFlags::S_NAN \| SIInstrFlags::Q_NAN;
	unsigned NewMask = LCC == ISD::SETO ?
	Mask->getZExtValue() & ~OrdMask :
	Mask->getZExtValue() & OrdMask;

	SDLoc DL(N);
	return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1, RHS.getOperand(0),
	DAG.getConstant(NewMask, DL, MVT::i32));
	}
	}

	if (VT == MVT::i32 &&
	(RHS.getOpcode() == ISD::SIGN_EXTEND \|\| LHS.getOpcode() == ISD::SIGN_EXTEND)) {
	// and x, (sext cc from i1) => select cc, x, 0
	if (RHS.getOpcode() != ISD::SIGN_EXTEND)
	std::swap(LHS, RHS);
	if (isBoolSGPR(RHS.getOperand(0)))
	return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
	LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
	}

	// and (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
	N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
	uint32_t LHSMask = getPermuteMask(DAG, LHS);
	uint32_t RHSMask = getPermuteMask(DAG, RHS);
	if (LHSMask != ~0u && RHSMask != ~0u) {
	// Canonicalize the expression in an attempt to have fewer unique masks
	// and therefore fewer registers used to hold the masks.
	if (LHSMask > RHSMask) {
	std::swap(LHSMask, RHSMask);
	std::swap(LHS, RHS);
	}

	// Select 0xc for each lane used from source operand. Zero has 0xc mask
	// set, 0xff have 0xff in the mask, actual lanes are in the 0-3 range.
	uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
	uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;

	// Check of we need to combine values from two sources within a byte.
	if (!(LHSUsedLanes & RHSUsedLanes) &&
	// If we select high and lower word keep it for SDWA.
	// TODO: teach SDWA to work with v_perm_b32 and remove the check.
	!(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
	// Each byte in each mask is either selector mask 0-3, or has higher
	// bits set in either of masks, which can be 0xff for 0xff or 0x0c for
	// zero. If 0x0c is in either mask it shall always be 0x0c. Otherwise
	// mask which is not 0xff wins. By anding both masks we have a correct
	// result except that 0x0c shall be corrected to give 0x0c only.
	uint32_t Mask = LHSMask & RHSMask;
	for (unsigned I = 0; I < 32; I += 8) {
	uint32_t ByteSel = 0xff << I;
	if ((LHSMask & ByteSel) == 0x0c \|\| (RHSMask & ByteSel) == 0x0c)
	Mask &= (0x0c << I) & 0xffffffff;
	}

	// Add 4 to each active LHS lane. It will not affect any existing 0xff
	// or 0x0c.
	uint32_t Sel = Mask \| (LHSUsedLanes & 0x04040404);
	SDLoc DL(N);

	return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32,
	LHS.getOperand(0), RHS.getOperand(0),
	DAG.getConstant(Sel, DL, MVT::i32));
	}
	}
	}

	return SDValue();
	}

	SDValue SITargetLowering::performOrCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	EVT VT = N->getValueType(0);
	if (VT == MVT::i1) {
	// or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 \| c2)
	if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
	RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
	SDValue Src = LHS.getOperand(0);
	if (Src != RHS.getOperand(0))
	return SDValue();

	const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
	const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
	if (!CLHS \|\| !CRHS)
	return SDValue();

	// Only 10 bits are used.
	static const uint32_t MaxMask = 0x3ff;

	uint32_t NewMask = (CLHS->getZExtValue() \| CRHS->getZExtValue()) & MaxMask;
	SDLoc DL(N);
	return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
	Src, DAG.getConstant(NewMask, DL, MVT::i32));
	}

	return SDValue();
	}

	// or (perm x, y, c1), c2 -> perm x, y, permute_mask(c1, c2)
	if (isa<ConstantSDNode>(RHS) && LHS.hasOneUse() &&
	LHS.getOpcode() == AMDGPUISD::PERM &&
	isa<ConstantSDNode>(LHS.getOperand(2))) {
	uint32_t Sel = getConstantPermuteMask(N->getConstantOperandVal(1));
	if (!Sel)
	return SDValue();

	Sel \|= LHS.getConstantOperandVal(2);
	SDLoc DL(N);
	return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, LHS.getOperand(0),
	LHS.getOperand(1), DAG.getConstant(Sel, DL, MVT::i32));
	}

	// or (op x, c1), (op y, c2) -> perm x, y, permute_mask(c1, c2)
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
	N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
	uint32_t LHSMask = getPermuteMask(DAG, LHS);
	uint32_t RHSMask = getPermuteMask(DAG, RHS);
	if (LHSMask != ~0u && RHSMask != ~0u) {
	// Canonicalize the expression in an attempt to have fewer unique masks
	// and therefore fewer registers used to hold the masks.
	if (LHSMask > RHSMask) {
	std::swap(LHSMask, RHSMask);
	std::swap(LHS, RHS);
	}

	// Select 0xc for each lane used from source operand. Zero has 0xc mask
	// set, 0xff have 0xff in the mask, actual lanes are in the 0-3 range.
	uint32_t LHSUsedLanes = ~(LHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;
	uint32_t RHSUsedLanes = ~(RHSMask & 0x0c0c0c0c) & 0x0c0c0c0c;

	// Check of we need to combine values from two sources within a byte.
	if (!(LHSUsedLanes & RHSUsedLanes) &&
	// If we select high and lower word keep it for SDWA.
	// TODO: teach SDWA to work with v_perm_b32 and remove the check.
	!(LHSUsedLanes == 0x0c0c0000 && RHSUsedLanes == 0x00000c0c)) {
	// Kill zero bytes selected by other mask. Zero value is 0xc.
	LHSMask &= ~RHSUsedLanes;
	RHSMask &= ~LHSUsedLanes;
	// Add 4 to each active LHS lane
	LHSMask \|= LHSUsedLanes & 0x04040404;
	// Combine masks
	uint32_t Sel = LHSMask \| RHSMask;
	SDLoc DL(N);

	return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32,
	LHS.getOperand(0), RHS.getOperand(0),
	DAG.getConstant(Sel, DL, MVT::i32));
	}
	}
	}

	if (VT != MVT::i64 \|\| DCI.isBeforeLegalizeOps())
	return SDValue();

	// TODO: This could be a generic combine with a predicate for extracting the
	// high half of an integer being free.

	// (or i64:x, (zero_extend i32:y)) ->
	// i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
	if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
	RHS.getOpcode() != ISD::ZERO_EXTEND)
	std::swap(LHS, RHS);

	if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
	SDValue ExtSrc = RHS.getOperand(0);
	EVT SrcVT = ExtSrc.getValueType();
	if (SrcVT == MVT::i32) {
	SDLoc SL(N);
	SDValue LowLHS, HiBits;
	std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
	SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);

	DCI.AddToWorklist(LowOr.getNode());
	DCI.AddToWorklist(HiBits.getNode());

	SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
	LowOr, HiBits);
	return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
	}
	}

	const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (CRHS) {
	if (SDValue Split
	= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR,
	N->getOperand(0), CRHS))
	return Split;
	}

	return SDValue();
	}

	SDValue SITargetLowering::performXorCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
	return RV;

	EVT VT = N->getValueType(0);
	if (VT != MVT::i64)
	return SDValue();

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
	if (CRHS) {
	if (SDValue Split
	= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
	return Split;
	}

	return SDValue();
	}

	SDValue SITargetLowering::performZeroExtendCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (!Subtarget->has16BitInsts() \|\|
	DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	return SDValue();

	EVT VT = N->getValueType(0);
	if (VT != MVT::i32)
	return SDValue();

	SDValue Src = N->getOperand(0);
	if (Src.getValueType() != MVT::i16)
	return SDValue();

	return SDValue();
	}

	SDValue SITargetLowering::performSignExtendInRegCombine(SDNode *N,
	DAGCombinerInfo &DCI)
	const {
	SDValue Src = N->getOperand(0);
	auto *VTSign = cast<VTSDNode>(N->getOperand(1));

	if (((Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE &&
	VTSign->getVT() == MVT::i8) \|\|
	(Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_USHORT &&
	VTSign->getVT() == MVT::i16)) &&
	Src.hasOneUse()) {
	auto *M = cast<MemSDNode>(Src);
	SDValue Ops[] = {
	Src.getOperand(0), // Chain
	Src.getOperand(1), // rsrc
	Src.getOperand(2), // vindex
	Src.getOperand(3), // voffset
	Src.getOperand(4), // soffset
	Src.getOperand(5), // offset
	Src.getOperand(6),
	Src.getOperand(7)
	};
	// replace with BUFFER_LOAD_BYTE/SHORT
	SDVTList ResList = DCI.DAG.getVTList(MVT::i32,
	Src.getOperand(0).getValueType());
	unsigned Opc = (Src.getOpcode() == AMDGPUISD::BUFFER_LOAD_UBYTE) ?
	AMDGPUISD::BUFFER_LOAD_BYTE : AMDGPUISD::BUFFER_LOAD_SHORT;
	SDValue BufferLoadSignExt = DCI.DAG.getMemIntrinsicNode(Opc, SDLoc(N),
	ResList,
	Ops, M->getMemoryVT(),
	M->getMemOperand());
	return DCI.DAG.getMergeValues({BufferLoadSignExt,
	BufferLoadSignExt.getValue(1)}, SDLoc(N));
	}
	return SDValue();
	}

	SDValue SITargetLowering::performClassCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue Mask = N->getOperand(1);

	// fp_class x, 0 -> false
	if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
	if (CMask->isZero())
	return DAG.getConstant(0, SDLoc(N), MVT::i1);
	}

	if (N->getOperand(0).isUndef())
	return DAG.getUNDEF(MVT::i1);

	return SDValue();
	}

	SDValue SITargetLowering::performRcpCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);
	SDValue N0 = N->getOperand(0);

	if (N0.isUndef())
	return N0;

	if (VT == MVT::f32 && (N0.getOpcode() == ISD::UINT_TO_FP \|\|
	N0.getOpcode() == ISD::SINT_TO_FP)) {
	return DCI.DAG.getNode(AMDGPUISD::RCP_IFLAG, SDLoc(N), VT, N0,
	N->getFlags());
	}

	if ((VT == MVT::f32 \|\| VT == MVT::f16) && N0.getOpcode() == ISD::FSQRT) {
	return DCI.DAG.getNode(AMDGPUISD::RSQ, SDLoc(N), VT,
	N0.getOperand(0), N->getFlags());
	}

	return AMDGPUTargetLowering::performRcpCombine(N, DCI);
	}

	bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
	unsigned MaxDepth) const {
	unsigned Opcode = Op.getOpcode();
	if (Opcode == ISD::FCANONICALIZE)
	return true;

	if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
	auto F = CFP->getValueAPF();
	if (F.isNaN() && F.isSignaling())
	return false;
	return !F.isDenormal() \|\| denormalsEnabledForType(DAG, Op.getValueType());
	}

	// If source is a result of another standard FP operation it is already in
	// canonical form.
	if (MaxDepth == 0)
	return false;

	switch (Opcode) {
	// These will flush denorms if required.
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::FCEIL:
	case ISD::FFLOOR:
	case ISD::FMA:
	case ISD::FMAD:
	case ISD::FSQRT:
	case ISD::FDIV:
	case ISD::FREM:
	case ISD::FP_ROUND:
	case ISD::FP_EXTEND:
	case AMDGPUISD::FMUL_LEGACY:
	case AMDGPUISD::FMAD_FTZ:
	case AMDGPUISD::RCP:
	case AMDGPUISD::RSQ:
	case AMDGPUISD::RSQ_CLAMP:
	case AMDGPUISD::RCP_LEGACY:
	case AMDGPUISD::RCP_IFLAG:
	case AMDGPUISD::DIV_SCALE:
	case AMDGPUISD::DIV_FMAS:
	case AMDGPUISD::DIV_FIXUP:
	case AMDGPUISD::FRACT:
	case AMDGPUISD::LDEXP:
	case AMDGPUISD::CVT_PKRTZ_F16_F32:
	case AMDGPUISD::CVT_F32_UBYTE0:
	case AMDGPUISD::CVT_F32_UBYTE1:
	case AMDGPUISD::CVT_F32_UBYTE2:
	case AMDGPUISD::CVT_F32_UBYTE3:
	return true;

	// It can/will be lowered or combined as a bit operation.
	// Need to check their input recursively to handle.
	case ISD::FNEG:
	case ISD::FABS:
	case ISD::FCOPYSIGN:
	return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);

	case ISD::FSIN:
	case ISD::FCOS:
	case ISD::FSINCOS:
	return Op.getValueType().getScalarType() != MVT::f16;

	case ISD::FMINNUM:
	case ISD::FMAXNUM:
	case ISD::FMINNUM_IEEE:
	case ISD::FMAXNUM_IEEE:
	case AMDGPUISD::CLAMP:
	case AMDGPUISD::FMED3:
	case AMDGPUISD::FMAX3:
	case AMDGPUISD::FMIN3: {
	// FIXME: Shouldn't treat the generic operations different based these.
	// However, we aren't really required to flush the result from
	// minnum/maxnum..

	// snans will be quieted, so we only need to worry about denormals.
	if (Subtarget->supportsMinMaxDenormModes() \|\|
	denormalsEnabledForType(DAG, Op.getValueType()))
	return true;

	// Flushing may be required.
	// In pre-GFX9 targets V_MIN_F32 and others do not flush denorms. For such
	// targets need to check their input recursively.

	// FIXME: Does this apply with clamp? It's implemented with max.
	for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I) {
	if (!isCanonicalized(DAG, Op.getOperand(I), MaxDepth - 1))
	return false;
	}

	return true;
	}
	case ISD::SELECT: {
	return isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1) &&
	isCanonicalized(DAG, Op.getOperand(2), MaxDepth - 1);
	}
	case ISD::BUILD_VECTOR: {
	for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
	SDValue SrcOp = Op.getOperand(i);
	if (!isCanonicalized(DAG, SrcOp, MaxDepth - 1))
	return false;
	}

	return true;
	}
	case ISD::EXTRACT_VECTOR_ELT:
	case ISD::EXTRACT_SUBVECTOR: {
	return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
	}
	case ISD::INSERT_VECTOR_ELT: {
	return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1) &&
	isCanonicalized(DAG, Op.getOperand(1), MaxDepth - 1);
	}
	case ISD::UNDEF:
	// Could be anything.
	return false;

	case ISD::BITCAST:
	return isCanonicalized(DAG, Op.getOperand(0), MaxDepth - 1);
	case ISD::TRUNCATE: {
	// Hack round the mess we make when legalizing extract_vector_elt
	if (Op.getValueType() == MVT::i16) {
	SDValue TruncSrc = Op.getOperand(0);
	if (TruncSrc.getValueType() == MVT::i32 &&
	TruncSrc.getOpcode() == ISD::BITCAST &&
	TruncSrc.getOperand(0).getValueType() == MVT::v2f16) {
	return isCanonicalized(DAG, TruncSrc.getOperand(0), MaxDepth - 1);
	}
	}
	return false;
	}
	case ISD::INTRINSIC_WO_CHAIN: {
	unsigned IntrinsicID
	= cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	// TODO: Handle more intrinsics
	switch (IntrinsicID) {
	case Intrinsic::amdgcn_cvt_pkrtz:
	case Intrinsic::amdgcn_cubeid:
	case Intrinsic::amdgcn_frexp_mant:
	case Intrinsic::amdgcn_fdot2:
	case Intrinsic::amdgcn_rcp:
	case Intrinsic::amdgcn_rsq:
	case Intrinsic::amdgcn_rsq_clamp:
	case Intrinsic::amdgcn_rcp_legacy:
	case Intrinsic::amdgcn_rsq_legacy:
	case Intrinsic::amdgcn_trig_preop:
	return true;
	default:
	break;
	}

	LLVM_FALLTHROUGH;
	}
	default:
	return denormalsEnabledForType(DAG, Op.getValueType()) &&
	DAG.isKnownNeverSNaN(Op);
	}

	llvm_unreachable("invalid operation");
	}

	bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
	unsigned MaxDepth) const {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	MachineInstr *MI = MRI.getVRegDef(Reg);
	unsigned Opcode = MI->getOpcode();

	if (Opcode == AMDGPU::G_FCANONICALIZE)
	return true;

	Optional<FPValueAndVReg> FCR;
	// Constant splat (can be padded with undef) or scalar constant.
	if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {
	if (FCR->Value.isSignaling())
	return false;
	return !FCR->Value.isDenormal() \|\|
	denormalsEnabledForType(MRI.getType(FCR->VReg), MF);
	}

	if (MaxDepth == 0)
	return false;

	switch (Opcode) {
	case AMDGPU::G_FMINNUM_IEEE:
	case AMDGPU::G_FMAXNUM_IEEE: {
	if (Subtarget->supportsMinMaxDenormModes() \|\|
	denormalsEnabledForType(MRI.getType(Reg), MF))
	return true;
	for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
	if (!isCanonicalized(MO.getReg(), MF, MaxDepth - 1))
	return false;
	return true;
	}
	default:
	return denormalsEnabledForType(MRI.getType(Reg), MF) &&
	isKnownNeverSNaN(Reg, MRI);
	}

	llvm_unreachable("invalid operation");
	}

	// Constant fold canonicalize.
	SDValue SITargetLowering::getCanonicalConstantFP(
	SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
	// Flush denormals to 0 if not enabled.
	if (C.isDenormal() && !denormalsEnabledForType(DAG, VT))
	return DAG.getConstantFP(0.0, SL, VT);

	if (C.isNaN()) {
	APFloat CanonicalQNaN = APFloat::getQNaN(C.getSemantics());
	if (C.isSignaling()) {
	// Quiet a signaling NaN.
	// FIXME: Is this supposed to preserve payload bits?
	return DAG.getConstantFP(CanonicalQNaN, SL, VT);
	}

	// Make sure it is the canonical NaN bitpattern.
	//
	// TODO: Can we use -1 as the canonical NaN value since it's an inline
	// immediate?
	if (C.bitcastToAPInt() != CanonicalQNaN.bitcastToAPInt())
	return DAG.getConstantFP(CanonicalQNaN, SL, VT);
	}

	// Already canonical.
	return DAG.getConstantFP(C, SL, VT);
	}

	static bool vectorEltWillFoldAway(SDValue Op) {
	return Op.isUndef() \|\| isa<ConstantFPSDNode>(Op);
	}

	SDValue SITargetLowering::performFCanonicalizeCombine(
	SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDValue N0 = N->getOperand(0);
	EVT VT = N->getValueType(0);

	// fcanonicalize undef -> qnan
	if (N0.isUndef()) {
	APFloat QNaN = APFloat::getQNaN(SelectionDAG::EVTToAPFloatSemantics(VT));
	return DAG.getConstantFP(QNaN, SDLoc(N), VT);
	}

	if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N0)) {
	EVT VT = N->getValueType(0);
	return getCanonicalConstantFP(DAG, SDLoc(N), VT, CFP->getValueAPF());
	}

	// fcanonicalize (build_vector x, k) -> build_vector (fcanonicalize x),
	// (fcanonicalize k)
	//
	// fcanonicalize (build_vector x, undef) -> build_vector (fcanonicalize x), 0

	// TODO: This could be better with wider vectors that will be split to v2f16,
	// and to consider uses since there aren't that many packed operations.
	if (N0.getOpcode() == ISD::BUILD_VECTOR && VT == MVT::v2f16 &&
	isTypeLegal(MVT::v2f16)) {
	SDLoc SL(N);
	SDValue NewElts[2];
	SDValue Lo = N0.getOperand(0);
	SDValue Hi = N0.getOperand(1);
	EVT EltVT = Lo.getValueType();

	if (vectorEltWillFoldAway(Lo) \|\| vectorEltWillFoldAway(Hi)) {
	for (unsigned I = 0; I != 2; ++I) {
	SDValue Op = N0.getOperand(I);
	if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
	NewElts[I] = getCanonicalConstantFP(DAG, SL, EltVT,
	CFP->getValueAPF());
	} else if (Op.isUndef()) {
	// Handled below based on what the other operand is.
	NewElts[I] = Op;
	} else {
	NewElts[I] = DAG.getNode(ISD::FCANONICALIZE, SL, EltVT, Op);
	}
	}

	// If one half is undef, and one is constant, prefer a splat vector rather
	// than the normal qNaN. If it's a register, prefer 0.0 since that's
	// cheaper to use and may be free with a packed operation.
	if (NewElts[0].isUndef()) {
	if (isa<ConstantFPSDNode>(NewElts[1]))
	NewElts[0] = isa<ConstantFPSDNode>(NewElts[1]) ?
	NewElts[1]: DAG.getConstantFP(0.0f, SL, EltVT);
	}

	if (NewElts[1].isUndef()) {
	NewElts[1] = isa<ConstantFPSDNode>(NewElts[0]) ?
	NewElts[0] : DAG.getConstantFP(0.0f, SL, EltVT);
	}

	return DAG.getBuildVector(VT, SL, NewElts);
	}
	}

	unsigned SrcOpc = N0.getOpcode();

	// If it's free to do so, push canonicalizes further up the source, which may
	// find a canonical source.
	//
	// TODO: More opcodes. Note this is unsafe for the the _ieee minnum/maxnum for
	// sNaNs.
	if (SrcOpc == ISD::FMINNUM \|\| SrcOpc == ISD::FMAXNUM) {
	auto *CRHS = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
	if (CRHS && N0.hasOneUse()) {
	SDLoc SL(N);
	SDValue Canon0 = DAG.getNode(ISD::FCANONICALIZE, SL, VT,
	N0.getOperand(0));
	SDValue Canon1 = getCanonicalConstantFP(DAG, SL, VT, CRHS->getValueAPF());
	DCI.AddToWorklist(Canon0.getNode());

	return DAG.getNode(N0.getOpcode(), SL, VT, Canon0, Canon1);
	}
	}

	return isCanonicalized(DAG, N0) ? N0 : SDValue();
	}

	static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
	switch (Opc) {
	case ISD::FMAXNUM:
	case ISD::FMAXNUM_IEEE:
	return AMDGPUISD::FMAX3;
	case ISD::SMAX:
	return AMDGPUISD::SMAX3;
	case ISD::UMAX:
	return AMDGPUISD::UMAX3;
	case ISD::FMINNUM:
	case ISD::FMINNUM_IEEE:
	return AMDGPUISD::FMIN3;
	case ISD::SMIN:
	return AMDGPUISD::SMIN3;
	case ISD::UMIN:
	return AMDGPUISD::UMIN3;
	default:
	llvm_unreachable("Not a min/max opcode");
	}
	}

	SDValue SITargetLowering::performIntMed3ImmCombine(
	SelectionDAG &DAG, const SDLoc &SL,
	SDValue Op0, SDValue Op1, bool Signed) const {
	ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
	if (!K1)
	return SDValue();

	ConstantSDNode *K0 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
	if (!K0)
	return SDValue();

	if (Signed) {
	if (K0->getAPIntValue().sge(K1->getAPIntValue()))
	return SDValue();
	} else {
	if (K0->getAPIntValue().uge(K1->getAPIntValue()))
	return SDValue();
	}

	EVT VT = K0->getValueType(0);
	unsigned Med3Opc = Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3;
	if (VT == MVT::i32 \|\| (VT == MVT::i16 && Subtarget->hasMed3_16())) {
	return DAG.getNode(Med3Opc, SL, VT,
	Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
	}

	// If there isn't a 16-bit med3 operation, convert to 32-bit.
	if (VT == MVT::i16) {
	MVT NVT = MVT::i32;
	unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;

	SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
	SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
	SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);

	SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
	return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
	}

	return SDValue();
	}

	static ConstantFPSDNode *getSplatConstantFP(SDValue Op) {
	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
	return C;

	if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op)) {
	if (ConstantFPSDNode *C = BV->getConstantFPSplatNode())
	return C;
	}

	return nullptr;
	}

	SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG,
	const SDLoc &SL,
	SDValue Op0,
	SDValue Op1) const {
	ConstantFPSDNode *K1 = getSplatConstantFP(Op1);
	if (!K1)
	return SDValue();

	ConstantFPSDNode *K0 = getSplatConstantFP(Op0.getOperand(1));
	if (!K0)
	return SDValue();

	// Ordered >= (although NaN inputs should have folded away by now).
	if (K0->getValueAPF() > K1->getValueAPF())
	return SDValue();

	const MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	// TODO: Check IEEE bit enabled?
	EVT VT = Op0.getValueType();
	if (Info->getMode().DX10Clamp) {
	// If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the
	// hardware fmed3 behavior converting to a min.
	// FIXME: Should this be allowing -0.0?
	if (K1->isExactlyValue(1.0) && K0->isExactlyValue(0.0))
	return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Op0.getOperand(0));
	}

	// med3 for f16 is only available on gfx9+, and not available for v2f16.
	if (VT == MVT::f32 \|\| (VT == MVT::f16 && Subtarget->hasMed3_16())) {
	// This isn't safe with signaling NaNs because in IEEE mode, min/max on a
	// signaling NaN gives a quiet NaN. The quiet NaN input to the min would
	// then give the other result, which is different from med3 with a NaN
	// input.
	SDValue Var = Op0.getOperand(0);
	if (!DAG.isKnownNeverSNaN(Var))
	return SDValue();

	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();

	if ((!K0->hasOneUse() \|\|
	TII->isInlineConstant(K0->getValueAPF().bitcastToAPInt())) &&
	(!K1->hasOneUse() \|\|
	TII->isInlineConstant(K1->getValueAPF().bitcastToAPInt()))) {
	return DAG.getNode(AMDGPUISD::FMED3, SL, K0->getValueType(0),
	Var, SDValue(K0, 0), SDValue(K1, 0));
	}
	}

	return SDValue();
	}

	SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;

	EVT VT = N->getValueType(0);
	unsigned Opc = N->getOpcode();
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	// Only do this if the inner op has one use since this will just increases
	// register pressure for no benefit.

	if (Opc != AMDGPUISD::FMIN_LEGACY && Opc != AMDGPUISD::FMAX_LEGACY &&
	!VT.isVector() &&
	(VT == MVT::i32 \|\| VT == MVT::f32 \|\|
	((VT == MVT::f16 \|\| VT == MVT::i16) && Subtarget->hasMin3Max3_16()))) {
	// max(max(a, b), c) -> max3(a, b, c)
	// min(min(a, b), c) -> min3(a, b, c)
	if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
	SDLoc DL(N);
	return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
	DL,
	N->getValueType(0),
	Op0.getOperand(0),
	Op0.getOperand(1),
	Op1);
	}

	// Try commuted.
	// max(a, max(b, c)) -> max3(a, b, c)
	// min(a, min(b, c)) -> min3(a, b, c)
	if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
	SDLoc DL(N);
	return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
	DL,
	N->getValueType(0),
	Op0,
	Op1.getOperand(0),
	Op1.getOperand(1));
	}
	}

	// min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
	if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {
	if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, true))
	return Med3;
	}

	if (Opc == ISD::UMIN && Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
	if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, false))
	return Med3;
	}

	// fminnum(fmaxnum(x, K0), K1), K0 < K1 && !is_snan(x) -> fmed3(x, K0, K1)
	if (((Opc == ISD::FMINNUM && Op0.getOpcode() == ISD::FMAXNUM) \|\|
	(Opc == ISD::FMINNUM_IEEE && Op0.getOpcode() == ISD::FMAXNUM_IEEE) \|\|
	(Opc == AMDGPUISD::FMIN_LEGACY &&
	Op0.getOpcode() == AMDGPUISD::FMAX_LEGACY)) &&
	(VT == MVT::f32 \|\| VT == MVT::f64 \|\|
	(VT == MVT::f16 && Subtarget->has16BitInsts()) \|\|
	(VT == MVT::v2f16 && Subtarget->hasVOP3PInsts())) &&
	Op0.hasOneUse()) {
	if (SDValue Res = performFPMed3ImmCombine(DAG, SDLoc(N), Op0, Op1))
	return Res;
	}

	return SDValue();
	}

	static bool isClampZeroToOne(SDValue A, SDValue B) {
	if (ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) {
	if (ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) {
	// FIXME: Should this be allowing -0.0?
	return (CA->isExactlyValue(0.0) && CB->isExactlyValue(1.0)) \|\|
	(CA->isExactlyValue(1.0) && CB->isExactlyValue(0.0));
	}
	}

	return false;
	}

	// FIXME: Should only worry about snans for version with chain.
	SDValue SITargetLowering::performFMed3Combine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	EVT VT = N->getValueType(0);
	// v_med3_f32 and v_max_f32 behave identically wrt denorms, exceptions and
	// NaNs. With a NaN input, the order of the operands may change the result.

	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);

	SDValue Src0 = N->getOperand(0);
	SDValue Src1 = N->getOperand(1);
	SDValue Src2 = N->getOperand(2);

	if (isClampZeroToOne(Src0, Src1)) {
	// const_a, const_b, x -> clamp is safe in all cases including signaling
	// nans.
	// FIXME: Should this be allowing -0.0?
	return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src2);
	}

	const MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	// FIXME: dx10_clamp behavior assumed in instcombine. Should we really bother
	// handling no dx10-clamp?
	if (Info->getMode().DX10Clamp) {
	// If NaNs is clamped to 0, we are free to reorder the inputs.

	if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
	std::swap(Src0, Src1);

	if (isa<ConstantFPSDNode>(Src1) && !isa<ConstantFPSDNode>(Src2))
	std::swap(Src1, Src2);

	if (isa<ConstantFPSDNode>(Src0) && !isa<ConstantFPSDNode>(Src1))
	std::swap(Src0, Src1);

	if (isClampZeroToOne(Src1, Src2))
	return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Src0);
	}

	return SDValue();
	}

	SDValue SITargetLowering::performCvtPkRTZCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SDValue Src0 = N->getOperand(0);
	SDValue Src1 = N->getOperand(1);
	if (Src0.isUndef() && Src1.isUndef())
	return DCI.DAG.getUNDEF(N->getValueType(0));
	return SDValue();
	}

	// Check if EXTRACT_VECTOR_ELT/INSERT_VECTOR_ELT (<n x e>, var-idx) should be
	// expanded into a set of cmp/select instructions.
	bool SITargetLowering::shouldExpandVectorDynExt(unsigned EltSize,
	unsigned NumElem,
	bool IsDivergentIdx,
	const GCNSubtarget *Subtarget) {
	if (UseDivergentRegisterIndexing)
	return false;

	unsigned VecSize = EltSize * NumElem;

	// Sub-dword vectors of size 2 dword or less have better implementation.
	if (VecSize <= 64 && EltSize < 32)
	return false;

	// Always expand the rest of sub-dword instructions, otherwise it will be
	// lowered via memory.
	if (EltSize < 32)
	return true;

	// Always do this if var-idx is divergent, otherwise it will become a loop.
	if (IsDivergentIdx)
	return true;

	// Large vectors would yield too many compares and v_cndmask_b32 instructions.
	unsigned NumInsts = NumElem /* Number of compares */ +
	((EltSize + 31) / 32) * NumElem /* Number of cndmasks */;

	// On some architectures (GFX9) movrel is not available and it's better
	// to expand.
	if (!Subtarget->hasMovrel())
	return NumInsts <= 16;

	// If movrel is available, use it instead of expanding for vector of 8
	// elements.
	return NumInsts <= 15;
	}

	bool SITargetLowering::shouldExpandVectorDynExt(SDNode *N) const {
	SDValue Idx = N->getOperand(N->getNumOperands() - 1);
	if (isa<ConstantSDNode>(Idx))
	return false;

	SDValue Vec = N->getOperand(0);
	EVT VecVT = Vec.getValueType();
	EVT EltVT = VecVT.getVectorElementType();
	unsigned EltSize = EltVT.getSizeInBits();
	unsigned NumElem = VecVT.getVectorNumElements();

	return SITargetLowering::shouldExpandVectorDynExt(
	EltSize, NumElem, Idx->isDivergent(), getSubtarget());
	}

	SDValue SITargetLowering::performExtractVectorEltCombine(
	SDNode *N, DAGCombinerInfo &DCI) const {
	SDValue Vec = N->getOperand(0);
	SelectionDAG &DAG = DCI.DAG;

	EVT VecVT = Vec.getValueType();
	EVT EltVT = VecVT.getVectorElementType();

	if ((Vec.getOpcode() == ISD::FNEG \|\|
	Vec.getOpcode() == ISD::FABS) && allUsesHaveSourceMods(N)) {
	SDLoc SL(N);
	EVT EltVT = N->getValueType(0);
	SDValue Idx = N->getOperand(1);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Vec.getOperand(0), Idx);
	return DAG.getNode(Vec.getOpcode(), SL, EltVT, Elt);
	}

	// ScalarRes = EXTRACT_VECTOR_ELT ((vector-BINOP Vec1, Vec2), Idx)
	// =>
	// Vec1Elt = EXTRACT_VECTOR_ELT(Vec1, Idx)
	// Vec2Elt = EXTRACT_VECTOR_ELT(Vec2, Idx)
	// ScalarRes = scalar-BINOP Vec1Elt, Vec2Elt
	if (Vec.hasOneUse() && DCI.isBeforeLegalize()) {
	SDLoc SL(N);
	EVT EltVT = N->getValueType(0);
	SDValue Idx = N->getOperand(1);
	unsigned Opc = Vec.getOpcode();

	switch(Opc) {
	default:
	break;
	// TODO: Support other binary operations.
	case ISD::FADD:
	case ISD::FSUB:
	case ISD::FMUL:
	case ISD::ADD:
	case ISD::UMIN:
	case ISD::UMAX:
	case ISD::SMIN:
	case ISD::SMAX:
	case ISD::FMAXNUM:
	case ISD::FMINNUM:
	case ISD::FMAXNUM_IEEE:
	case ISD::FMINNUM_IEEE: {
	SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Vec.getOperand(0), Idx);
	SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
	Vec.getOperand(1), Idx);

	DCI.AddToWorklist(Elt0.getNode());
	DCI.AddToWorklist(Elt1.getNode());
	return DAG.getNode(Opc, SL, EltVT, Elt0, Elt1, Vec->getFlags());
	}
	}
	}

	unsigned VecSize = VecVT.getSizeInBits();
	unsigned EltSize = EltVT.getSizeInBits();

	// EXTRACT_VECTOR_ELT (<n x e>, var-idx) => n x select (e, const-idx)
	if (shouldExpandVectorDynExt(N)) {
	SDLoc SL(N);
	SDValue Idx = N->getOperand(1);
	SDValue V;
	for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
	SDValue IC = DAG.getVectorIdxConstant(I, SL);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
	if (I == 0)
	V = Elt;
	else
	V = DAG.getSelectCC(SL, Idx, IC, Elt, V, ISD::SETEQ);
	}
	return V;
	}

	if (!DCI.isBeforeLegalize())
	return SDValue();

	// Try to turn sub-dword accesses of vectors into accesses of the same 32-bit
	// elements. This exposes more load reduction opportunities by replacing
	// multiple small extract_vector_elements with a single 32-bit extract.
	auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (isa<MemSDNode>(Vec) &&
	EltSize <= 16 &&
	EltVT.isByteSized() &&
	VecSize > 32 &&
	VecSize % 32 == 0 &&
	Idx) {
	EVT NewVT = getEquivalentMemType(*DAG.getContext(), VecVT);

	unsigned BitIndex = Idx->getZExtValue() * EltSize;
	unsigned EltIdx = BitIndex / 32;
	unsigned LeftoverBitIdx = BitIndex % 32;
	SDLoc SL(N);

	SDValue Cast = DAG.getNode(ISD::BITCAST, SL, NewVT, Vec);
	DCI.AddToWorklist(Cast.getNode());

	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Cast,
	DAG.getConstant(EltIdx, SL, MVT::i32));
	DCI.AddToWorklist(Elt.getNode());
	SDValue Srl = DAG.getNode(ISD::SRL, SL, MVT::i32, Elt,
	DAG.getConstant(LeftoverBitIdx, SL, MVT::i32));
	DCI.AddToWorklist(Srl.getNode());

	SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, EltVT.changeTypeToInteger(), Srl);
	DCI.AddToWorklist(Trunc.getNode());
	return DAG.getNode(ISD::BITCAST, SL, EltVT, Trunc);
	}

	return SDValue();
	}

	SDValue
	SITargetLowering::performInsertVectorEltCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SDValue Vec = N->getOperand(0);
	SDValue Idx = N->getOperand(2);
	EVT VecVT = Vec.getValueType();
	EVT EltVT = VecVT.getVectorElementType();

	// INSERT_VECTOR_ELT (<n x e>, var-idx)
	// => BUILD_VECTOR n x select (e, const-idx)
	if (!shouldExpandVectorDynExt(N))
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);
	SDValue Ins = N->getOperand(1);
	EVT IdxVT = Idx.getValueType();

	SmallVector<SDValue, 16> Ops;
	for (unsigned I = 0, E = VecVT.getVectorNumElements(); I < E; ++I) {
	SDValue IC = DAG.getConstant(I, SL, IdxVT);
	SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Vec, IC);
	SDValue V = DAG.getSelectCC(SL, Idx, IC, Ins, Elt, ISD::SETEQ);
	Ops.push_back(V);
	}

	return DAG.getBuildVector(VecVT, SL, Ops);
	}

	unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
	const SDNode *N0,
	const SDNode *N1) const {
	EVT VT = N0->getValueType(0);

	// Only do this if we are not trying to support denormals. v_mad_f32 does not
	// support denormals ever.
	if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) \|\|
	(VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
	getSubtarget()->hasMadF16())) &&
	isOperationLegal(ISD::FMAD, VT))
	return ISD::FMAD;

	const TargetOptions &Options = DAG.getTarget().Options;
	if ((Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath \|\|
	(N0->getFlags().hasAllowContract() &&
	N1->getFlags().hasAllowContract())) &&
	isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) {
	return ISD::FMA;
	}

	return 0;
	}

	// For a reassociatable opcode perform:
	// op x, (op y, z) -> op (op x, z), y, if x and z are uniform
	SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
	SelectionDAG &DAG) const {
	EVT VT = N->getValueType(0);
	if (VT != MVT::i32 && VT != MVT::i64)
	return SDValue();

	if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
	return SDValue();

	unsigned Opc = N->getOpcode();
	SDValue Op0 = N->getOperand(0);
	SDValue Op1 = N->getOperand(1);

	if (!(Op0->isDivergent() ^ Op1->isDivergent()))
	return SDValue();

	if (Op0->isDivergent())
	std::swap(Op0, Op1);

	if (Op1.getOpcode() != Opc \|\| !Op1.hasOneUse())
	return SDValue();

	SDValue Op2 = Op1.getOperand(1);
	Op1 = Op1.getOperand(0);
	if (!(Op1->isDivergent() ^ Op2->isDivergent()))
	return SDValue();

	if (Op1->isDivergent())
	std::swap(Op1, Op2);

	SDLoc SL(N);
	SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
	return DAG.getNode(Opc, SL, VT, Add1, Op2);
	}

	static SDValue getMad64_32(SelectionDAG &DAG, const SDLoc &SL,
	EVT VT,
	SDValue N0, SDValue N1, SDValue N2,
	bool Signed) {
	unsigned MadOpc = Signed ? AMDGPUISD::MAD_I64_I32 : AMDGPUISD::MAD_U64_U32;
	SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i1);
	SDValue Mad = DAG.getNode(MadOpc, SL, VTs, N0, N1, N2);
	return DAG.getNode(ISD::TRUNCATE, SL, VT, Mad);
	}

	// Fold (add (mul x, y), z) --> (mad_[iu]64_[iu]32 x, y, z) plus high
	// multiplies, if any.
	//
	// Full 64-bit multiplies that feed into an addition are lowered here instead
	// of using the generic expansion. The generic expansion ends up with
	// a tree of ADD nodes that prevents us from using the "add" part of the
	// MAD instruction. The expansion produced here results in a chain of ADDs
	// instead of a tree.
	SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
	DAGCombinerInfo &DCI) const {
	assert(N->getOpcode() == ISD::ADD);

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDLoc SL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	if (VT.isVector())
	return SDValue();

	// S_MUL_HI_[IU]32 was added in gfx9, which allows us to keep the overall
	// result in scalar registers for uniform values.
	if (!N->isDivergent() && Subtarget->hasSMulHi())
	return SDValue();

	unsigned NumBits = VT.getScalarSizeInBits();
	if (NumBits <= 32 \|\| NumBits > 64)
	return SDValue();

	if (LHS.getOpcode() != ISD::MUL) {
	assert(RHS.getOpcode() == ISD::MUL);
	std::swap(LHS, RHS);
	}

	// Avoid the fold if it would unduly increase the number of multiplies due to
	// multiple uses, except on hardware with full-rate multiply-add (which is
	// part of full-rate 64-bit ops).
	if (!Subtarget->hasFullRate64Ops()) {
	unsigned NumUsers = 0;
	for (SDNode *Use : LHS->uses()) {
	// There is a use that does not feed into addition, so the multiply can't
	// be removed. We prefer MUL + ADD + ADDC over MAD + MUL.
	if (Use->getOpcode() != ISD::ADD)
	return SDValue();

	// We prefer 2xMAD over MUL + 2xADD + 2xADDC (code density), and prefer
	// MUL + 3xADD + 3xADDC over 3xMAD.
	++NumUsers;
	if (NumUsers >= 3)
	return SDValue();
	}
	}

	SDValue MulLHS = LHS.getOperand(0);
	SDValue MulRHS = LHS.getOperand(1);
	SDValue AddRHS = RHS;

	// Always check whether operands are small unsigned values, since that
	// knowledge is useful in more cases. Check for small signed values only if
	// doing so can unlock a shorter code sequence.
	bool MulLHSUnsigned32 = numBitsUnsigned(MulLHS, DAG) <= 32;
	bool MulRHSUnsigned32 = numBitsUnsigned(MulRHS, DAG) <= 32;

	bool MulSignedLo = false;
	if (!MulLHSUnsigned32 \|\| !MulRHSUnsigned32) {
	MulSignedLo = numBitsSigned(MulLHS, DAG) <= 32 &&
	numBitsSigned(MulRHS, DAG) <= 32;
	}

	// The operands and final result all have the same number of bits. If
	// operands need to be extended, they can be extended with garbage. The
	// resulting garbage in the high bits of the mad_[iu]64_[iu]32 result is
	// truncated away in the end.
	if (VT != MVT::i64) {
	MulLHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulLHS);
	MulRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, MulRHS);
	AddRHS = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i64, AddRHS);
	}

	// The basic code generated is conceptually straightforward. Pseudo code:
	//
	// accum = mad_64_32 lhs.lo, rhs.lo, accum
	// accum.hi = add (mul lhs.hi, rhs.lo), accum.hi
	// accum.hi = add (mul lhs.lo, rhs.hi), accum.hi
	//
	// The second and third lines are optional, depending on whether the factors
	// are {sign,zero}-extended or not.
	//
	// The actual DAG is noisier than the pseudo code, but only due to
	// instructions that disassemble values into low and high parts, and
	// assemble the final result.
	SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
	SDValue One = DAG.getConstant(1, SL, MVT::i32);

	auto MulLHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulLHS);
	auto MulRHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulRHS);
	SDValue Accum =
	getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);

	if (!MulSignedLo && (!MulLHSUnsigned32 \|\| !MulRHSUnsigned32)) {
	auto AccumLo = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, Zero);
	auto AccumHi = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, One);

	if (!MulLHSUnsigned32) {
	auto MulLHSHi =
	DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulLHS, One);
	SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSHi, MulRHSLo);
	AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
	}

	if (!MulRHSUnsigned32) {
	auto MulRHSHi =
	DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, MulRHS, One);
	SDValue MulHi = DAG.getNode(ISD::MUL, SL, MVT::i32, MulLHSLo, MulRHSHi);
	AccumHi = DAG.getNode(ISD::ADD, SL, MVT::i32, MulHi, AccumHi);
	}

	Accum = DAG.getBuildVector(MVT::v2i32, SL, {AccumLo, AccumHi});
	Accum = DAG.getBitcast(MVT::i64, Accum);
	}

	if (VT != MVT::i64)
	Accum = DAG.getNode(ISD::TRUNCATE, SL, VT, Accum);
	return Accum;
	}

	SDValue SITargetLowering::performAddCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDLoc SL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	if (LHS.getOpcode() == ISD::MUL \|\| RHS.getOpcode() == ISD::MUL) {
	if (Subtarget->hasMad64_32()) {
	if (SDValue Folded = tryFoldToMad64_32(N, DCI))
	return Folded;
	}

	return SDValue();
	}

	if (SDValue V = reassociateScalarOps(N, DAG)) {
	return V;
	}

	if (VT != MVT::i32 \|\| !DCI.isAfterLegalizeDAG())
	return SDValue();

	// add x, zext (setcc) => addcarry x, 0, setcc
	// add x, sext (setcc) => subcarry x, 0, setcc
	unsigned Opc = LHS.getOpcode();
	if (Opc == ISD::ZERO_EXTEND \|\| Opc == ISD::SIGN_EXTEND \|\|
	Opc == ISD::ANY_EXTEND \|\| Opc == ISD::ADDCARRY)
	std::swap(RHS, LHS);

	Opc = RHS.getOpcode();
	switch (Opc) {
	default: break;
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ANY_EXTEND: {
	auto Cond = RHS.getOperand(0);
	// If this won't be a real VOPC output, we would still need to insert an
	// extra instruction anyway.
	if (!isBoolSGPR(Cond))
	break;
	SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
	SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
	Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
	return DAG.getNode(Opc, SL, VTList, Args);
	}
	case ISD::ADDCARRY: {
	// add x, (addcarry y, 0, cc) => addcarry x, y, cc
	auto C = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
	if (!C \|\| C->getZExtValue() != 0) break;
	SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) };
	return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args);
	}
	}
	return SDValue();
	}

	SDValue SITargetLowering::performSubCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	if (VT != MVT::i32)
	return SDValue();

	SDLoc SL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// sub x, zext (setcc) => subcarry x, 0, setcc
	// sub x, sext (setcc) => addcarry x, 0, setcc
	unsigned Opc = RHS.getOpcode();
	switch (Opc) {
	default: break;
	case ISD::ZERO_EXTEND:
	case ISD::SIGN_EXTEND:
	case ISD::ANY_EXTEND: {
	auto Cond = RHS.getOperand(0);
	// If this won't be a real VOPC output, we would still need to insert an
	// extra instruction anyway.
	if (!isBoolSGPR(Cond))
	break;
	SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
	SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
	Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
	return DAG.getNode(Opc, SL, VTList, Args);
	}
	}

	if (LHS.getOpcode() == ISD::SUBCARRY) {
	// sub (subcarry x, 0, cc), y => subcarry x, y, cc
	auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
	if (!C \|\| !C->isZero())
	return SDValue();
	SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
	return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
	}
	return SDValue();
	}

	SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {

	if (N->getValueType(0) != MVT::i32)
	return SDValue();

	auto C = dyn_cast<ConstantSDNode>(N->getOperand(1));
	if (!C \|\| C->getZExtValue() != 0)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDValue LHS = N->getOperand(0);

	// addcarry (add x, y), 0, cc => addcarry x, y, cc
	// subcarry (sub x, y), 0, cc => subcarry x, y, cc
	unsigned LHSOpc = LHS.getOpcode();
	unsigned Opc = N->getOpcode();
	if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) \|\|
	(LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) {
	SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) };
	return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args);
	}
	return SDValue();
	}

	SDValue SITargetLowering::performFAddCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	SDLoc SL(N);
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);

	// These should really be instruction patterns, but writing patterns with
	// source modifiers is a pain.

	// fadd (fadd (a, a), b) -> mad 2.0, a, b
	if (LHS.getOpcode() == ISD::FADD) {
	SDValue A = LHS.getOperand(0);
	if (A == LHS.getOperand(1)) {
	unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
	if (FusedOp != 0) {
	const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
	return DAG.getNode(FusedOp, SL, VT, A, Two, RHS);
	}
	}
	}

	// fadd (b, fadd (a, a)) -> mad 2.0, a, b
	if (RHS.getOpcode() == ISD::FADD) {
	SDValue A = RHS.getOperand(0);
	if (A == RHS.getOperand(1)) {
	unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
	if (FusedOp != 0) {
	const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
	return DAG.getNode(FusedOp, SL, VT, A, Two, LHS);
	}
	}
	}

	return SDValue();
	}

	SDValue SITargetLowering::performFSubCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
	return SDValue();

	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);
	EVT VT = N->getValueType(0);
	assert(!VT.isVector());

	// Try to get the fneg to fold into the source modifier. This undoes generic
	// DAG combines and folds them into the mad.
	//
	// Only do this if we are not trying to support denormals. v_mad_f32 does
	// not support denormals ever.
	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	if (LHS.getOpcode() == ISD::FADD) {
	// (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
	SDValue A = LHS.getOperand(0);
	if (A == LHS.getOperand(1)) {
	unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
	if (FusedOp != 0){
	const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
	SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);

	return DAG.getNode(FusedOp, SL, VT, A, Two, NegRHS);
	}
	}
	}

	if (RHS.getOpcode() == ISD::FADD) {
	// (fsub c, (fadd a, a)) -> mad -2.0, a, c

	SDValue A = RHS.getOperand(0);
	if (A == RHS.getOperand(1)) {
	unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
	if (FusedOp != 0){
	const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
	return DAG.getNode(FusedOp, SL, VT, A, NegTwo, LHS);
	}
	}
	}

	return SDValue();
	}

	SDValue SITargetLowering::performFMACombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);
	SDLoc SL(N);

	if (!Subtarget->hasDot7Insts() \|\| VT != MVT::f32)
	return SDValue();

	// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
	// FDOT2((V2F16)S0, (V2F16)S1, (F32)z))
	SDValue Op1 = N->getOperand(0);
	SDValue Op2 = N->getOperand(1);
	SDValue FMA = N->getOperand(2);

	if (FMA.getOpcode() != ISD::FMA \|\|
	Op1.getOpcode() != ISD::FP_EXTEND \|\|
	Op2.getOpcode() != ISD::FP_EXTEND)
	return SDValue();

	// fdot2_f32_f16 always flushes fp32 denormal operand and output to zero,
	// regardless of the denorm mode setting. Therefore,
	// unsafe-fp-math/fp-contract is sufficient to allow generating fdot2.
	const TargetOptions &Options = DAG.getTarget().Options;
	if (Options.AllowFPOpFusion == FPOpFusion::Fast \|\| Options.UnsafeFPMath \|\|
	(N->getFlags().hasAllowContract() &&
	FMA->getFlags().hasAllowContract())) {
	Op1 = Op1.getOperand(0);
	Op2 = Op2.getOperand(0);
	if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	Op2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	SDValue Vec1 = Op1.getOperand(0);
	SDValue Idx1 = Op1.getOperand(1);
	SDValue Vec2 = Op2.getOperand(0);

	SDValue FMAOp1 = FMA.getOperand(0);
	SDValue FMAOp2 = FMA.getOperand(1);
	SDValue FMAAcc = FMA.getOperand(2);

	if (FMAOp1.getOpcode() != ISD::FP_EXTEND \|\|
	FMAOp2.getOpcode() != ISD::FP_EXTEND)
	return SDValue();

	FMAOp1 = FMAOp1.getOperand(0);
	FMAOp2 = FMAOp2.getOperand(0);
	if (FMAOp1.getOpcode() != ISD::EXTRACT_VECTOR_ELT \|\|
	FMAOp2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
	return SDValue();

	SDValue Vec3 = FMAOp1.getOperand(0);
	SDValue Vec4 = FMAOp2.getOperand(0);
	SDValue Idx2 = FMAOp1.getOperand(1);

	if (Idx1 != Op2.getOperand(1) \|\| Idx2 != FMAOp2.getOperand(1) \|\|
	// Idx1 and Idx2 cannot be the same.
	Idx1 == Idx2)
	return SDValue();

	if (Vec1 == Vec2 \|\| Vec3 == Vec4)
	return SDValue();

	if (Vec1.getValueType() != MVT::v2f16 \|\| Vec2.getValueType() != MVT::v2f16)
	return SDValue();

	if ((Vec1 == Vec3 && Vec2 == Vec4) \|\|
	(Vec1 == Vec4 && Vec2 == Vec3)) {
	return DAG.getNode(AMDGPUISD::FDOT2, SL, MVT::f32, Vec1, Vec2, FMAAcc,
	DAG.getTargetConstant(0, SL, MVT::i1));
	}
	}
	return SDValue();
	}

	SDValue SITargetLowering::performSetCCCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);

	SDValue LHS = N->getOperand(0);
	SDValue RHS = N->getOperand(1);
	EVT VT = LHS.getValueType();
	ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();

	auto CRHS = dyn_cast<ConstantSDNode>(RHS);
	if (!CRHS) {
	CRHS = dyn_cast<ConstantSDNode>(LHS);
	if (CRHS) {
	std::swap(LHS, RHS);
	CC = getSetCCSwappedOperands(CC);
	}
	}

	if (CRHS) {
	if (VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND &&
	isBoolSGPR(LHS.getOperand(0))) {
	// setcc (sext from i1 cc), -1, ne\|sgt\|ult) => not cc => xor cc, -1
	// setcc (sext from i1 cc), -1, eq\|sle\|uge) => cc
	// setcc (sext from i1 cc), 0, eq\|sge\|ule) => not cc => xor cc, -1
	// setcc (sext from i1 cc), 0, ne\|ugt\|slt) => cc
	if ((CRHS->isAllOnes() &&
	(CC == ISD::SETNE \|\| CC == ISD::SETGT \|\| CC == ISD::SETULT)) \|\|
	(CRHS->isZero() &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETGE \|\| CC == ISD::SETULE)))
	return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
	DAG.getConstant(-1, SL, MVT::i1));
	if ((CRHS->isAllOnes() &&
	(CC == ISD::SETEQ \|\| CC == ISD::SETLE \|\| CC == ISD::SETUGE)) \|\|
	(CRHS->isZero() &&
	(CC == ISD::SETNE \|\| CC == ISD::SETUGT \|\| CC == ISD::SETLT)))
	return LHS.getOperand(0);
	}

	const APInt &CRHSVal = CRHS->getAPIntValue();
	if ((CC == ISD::SETEQ \|\| CC == ISD::SETNE) &&
	LHS.getOpcode() == ISD::SELECT &&
	isa<ConstantSDNode>(LHS.getOperand(1)) &&
	isa<ConstantSDNode>(LHS.getOperand(2)) &&
	LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) &&
	isBoolSGPR(LHS.getOperand(0))) {
	// Given CT != FT:
	// setcc (select cc, CT, CF), CF, eq => xor cc, -1
	// setcc (select cc, CT, CF), CF, ne => cc
	// setcc (select cc, CT, CF), CT, ne => xor cc, -1
	// setcc (select cc, CT, CF), CT, eq => cc
	const APInt &CT = LHS.getConstantOperandAPInt(1);
	const APInt &CF = LHS.getConstantOperandAPInt(2);

	if ((CF == CRHSVal && CC == ISD::SETEQ) \|\|
	(CT == CRHSVal && CC == ISD::SETNE))
	return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
	DAG.getConstant(-1, SL, MVT::i1));
	if ((CF == CRHSVal && CC == ISD::SETNE) \|\|
	(CT == CRHSVal && CC == ISD::SETEQ))
	return LHS.getOperand(0);
	}
	}

	if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
	VT != MVT::f16))
	return SDValue();

	// Match isinf/isfinite pattern
	// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity \| n_infinity))
	// (fcmp one (fabs x), inf) -> (fp_class x,
	// (p_normal \| n_normal \| p_subnormal \| n_subnormal \| p_zero \| n_zero)
	if ((CC == ISD::SETOEQ \|\| CC == ISD::SETONE) && LHS.getOpcode() == ISD::FABS) {
	const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
	if (!CRHS)
	return SDValue();

	const APFloat &APF = CRHS->getValueAPF();
	if (APF.isInfinity() && !APF.isNegative()) {
	const unsigned IsInfMask = SIInstrFlags::P_INFINITY \|
	SIInstrFlags::N_INFINITY;
	const unsigned IsFiniteMask = SIInstrFlags::N_ZERO \|
	SIInstrFlags::P_ZERO \|
	SIInstrFlags::N_NORMAL \|
	SIInstrFlags::P_NORMAL \|
	SIInstrFlags::N_SUBNORMAL \|
	SIInstrFlags::P_SUBNORMAL;
	unsigned Mask = CC == ISD::SETOEQ ? IsInfMask : IsFiniteMask;
	return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1, LHS.getOperand(0),
	DAG.getConstant(Mask, SL, MVT::i32));
	}
	}

	return SDValue();
	}

	SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SelectionDAG &DAG = DCI.DAG;
	SDLoc SL(N);
	unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;

	SDValue Src = N->getOperand(0);
	SDValue Shift = N->getOperand(0);

	// TODO: Extend type shouldn't matter (assuming legal types).
	if (Shift.getOpcode() == ISD::ZERO_EXTEND)
	Shift = Shift.getOperand(0);

	if (Shift.getOpcode() == ISD::SRL \|\| Shift.getOpcode() == ISD::SHL) {
	// cvt_f32_ubyte1 (shl x, 8) -> cvt_f32_ubyte0 x
	// cvt_f32_ubyte3 (shl x, 16) -> cvt_f32_ubyte1 x
	// cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
	// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
	// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
	if (auto *C = dyn_cast<ConstantSDNode>(Shift.getOperand(1))) {
	SDValue Shifted = DAG.getZExtOrTrunc(Shift.getOperand(0),
	SDLoc(Shift.getOperand(0)), MVT::i32);

	unsigned ShiftOffset = 8 * Offset;
	if (Shift.getOpcode() == ISD::SHL)
	ShiftOffset -= C->getZExtValue();
	else
	ShiftOffset += C->getZExtValue();

	if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
	return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + ShiftOffset / 8, SL,
	MVT::f32, Shifted);
	}
	}
	}

	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
	APInt DemandedBits = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
	if (TLI.SimplifyDemandedBits(Src, DemandedBits, DCI)) {
	// We simplified Src. If this node is not dead, visit it again so it is
	// folded properly.
	if (N->getOpcode() != ISD::DELETED_NODE)
	DCI.AddToWorklist(N);
	return SDValue(N, 0);
	}

	// Handle (or x, (srl y, 8)) pattern when known bits are zero.
	if (SDValue DemandedSrc =
	TLI.SimplifyMultipleUseDemandedBits(Src, DemandedBits, DAG))
	return DAG.getNode(N->getOpcode(), SL, MVT::f32, DemandedSrc);

	return SDValue();
	}

	SDValue SITargetLowering::performClampCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	ConstantFPSDNode *CSrc = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
	if (!CSrc)
	return SDValue();

	const MachineFunction &MF = DCI.DAG.getMachineFunction();
	const APFloat &F = CSrc->getValueAPF();
	APFloat Zero = APFloat::getZero(F.getSemantics());
	if (F < Zero \|\|
	(F.isNaN() && MF.getInfo<SIMachineFunctionInfo>()->getMode().DX10Clamp)) {
	return DCI.DAG.getConstantFP(Zero, SDLoc(N), N->getValueType(0));
	}

	APFloat One(F.getSemantics(), "1.0");
	if (F > One)
	return DCI.DAG.getConstantFP(One, SDLoc(N), N->getValueType(0));

	return SDValue(CSrc, 0);
	}


	SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
	return SDValue();
	switch (N->getOpcode()) {
	case ISD::ADD:
	return performAddCombine(N, DCI);
	case ISD::SUB:
	return performSubCombine(N, DCI);
	case ISD::ADDCARRY:
	case ISD::SUBCARRY:
	return performAddCarrySubCarryCombine(N, DCI);
	case ISD::FADD:
	return performFAddCombine(N, DCI);
	case ISD::FSUB:
	return performFSubCombine(N, DCI);
	case ISD::SETCC:
	return performSetCCCombine(N, DCI);
	case ISD::FMAXNUM:
	case ISD::FMINNUM:
	case ISD::FMAXNUM_IEEE:
	case ISD::FMINNUM_IEEE:
	case ISD::SMAX:
	case ISD::SMIN:
	case ISD::UMAX:
	case ISD::UMIN:
	case AMDGPUISD::FMIN_LEGACY:
	case AMDGPUISD::FMAX_LEGACY:
	return performMinMaxCombine(N, DCI);
	case ISD::FMA:
	return performFMACombine(N, DCI);
	case ISD::AND:
	return performAndCombine(N, DCI);
	case ISD::OR:
	return performOrCombine(N, DCI);
	case ISD::XOR:
	return performXorCombine(N, DCI);
	case ISD::ZERO_EXTEND:
	return performZeroExtendCombine(N, DCI);
	case ISD::SIGN_EXTEND_INREG:
	return performSignExtendInRegCombine(N , DCI);
	case AMDGPUISD::FP_CLASS:
	return performClassCombine(N, DCI);
	case ISD::FCANONICALIZE:
	return performFCanonicalizeCombine(N, DCI);
	case AMDGPUISD::RCP:
	return performRcpCombine(N, DCI);
	case AMDGPUISD::FRACT:
	case AMDGPUISD::RSQ:
	case AMDGPUISD::RCP_LEGACY:
	case AMDGPUISD::RCP_IFLAG:
	case AMDGPUISD::RSQ_CLAMP:
	case AMDGPUISD::LDEXP: {
	// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
	SDValue Src = N->getOperand(0);
	if (Src.isUndef())
	return Src;
	break;
	}
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	return performUCharToFloatCombine(N, DCI);
	case AMDGPUISD::CVT_F32_UBYTE0:
	case AMDGPUISD::CVT_F32_UBYTE1:
	case AMDGPUISD::CVT_F32_UBYTE2:
	case AMDGPUISD::CVT_F32_UBYTE3:
	return performCvtF32UByteNCombine(N, DCI);
	case AMDGPUISD::FMED3:
	return performFMed3Combine(N, DCI);
	case AMDGPUISD::CVT_PKRTZ_F16_F32:
	return performCvtPkRTZCombine(N, DCI);
	case AMDGPUISD::CLAMP:
	return performClampCombine(N, DCI);
	case ISD::SCALAR_TO_VECTOR: {
	SelectionDAG &DAG = DCI.DAG;
	EVT VT = N->getValueType(0);

	// v2i16 (scalar_to_vector i16:x) -> v2i16 (bitcast (any_extend i16:x))
	if (VT == MVT::v2i16 \|\| VT == MVT::v2f16) {
	SDLoc SL(N);
	SDValue Src = N->getOperand(0);
	EVT EltVT = Src.getValueType();
	if (EltVT == MVT::f16)
	Src = DAG.getNode(ISD::BITCAST, SL, MVT::i16, Src);

	SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Src);
	return DAG.getNode(ISD::BITCAST, SL, VT, Ext);
	}

	break;
	}
	case ISD::EXTRACT_VECTOR_ELT:
	return performExtractVectorEltCombine(N, DCI);
	case ISD::INSERT_VECTOR_ELT:
	return performInsertVectorEltCombine(N, DCI);
	case ISD::LOAD: {
	if (SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
	return Widended;
	LLVM_FALLTHROUGH;
	}
	default: {
	if (!DCI.isBeforeLegalize()) {
	if (MemSDNode *MemNode = dyn_cast<MemSDNode>(N))
	return performMemSDNodeCombine(MemNode, DCI);
	}

	break;
	}
	}

	return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
	}

	/// Helper function for adjustWritemask
	static unsigned SubIdx2Lane(unsigned Idx) {
	switch (Idx) {
	default: return ~0u;
	case AMDGPU::sub0: return 0;
	case AMDGPU::sub1: return 1;
	case AMDGPU::sub2: return 2;
	case AMDGPU::sub3: return 3;
	case AMDGPU::sub4: return 4; // Possible with TFE/LWE
	}
	}

	/// Adjust the writemask of MIMG instructions
	SDNode SITargetLowering::adjustWritemask(MachineSDNode &Node,
	SelectionDAG &DAG) const {
	unsigned Opcode = Node->getMachineOpcode();

	// Subtract 1 because the vdata output is not a MachineSDNode operand.
	int D16Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::d16) - 1;
	if (D16Idx >= 0 && Node->getConstantOperandVal(D16Idx))
	return Node; // not implemented for D16

	SDNode *Users[5] = { nullptr };
	unsigned Lane = 0;
	unsigned DmaskIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) - 1;
	unsigned OldDmask = Node->getConstantOperandVal(DmaskIdx);
	unsigned NewDmask = 0;
	unsigned TFEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::tfe) - 1;
	unsigned LWEIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::lwe) - 1;
	bool UsesTFC = ((int(TFEIdx) >= 0 && Node->getConstantOperandVal(TFEIdx)) \|\|
	Node->getConstantOperandVal(LWEIdx))
	? true
	: false;
	unsigned TFCLane = 0;
	bool HasChain = Node->getNumValues() > 1;

	if (OldDmask == 0) {
	// These are folded out, but on the chance it happens don't assert.
	return Node;
	}

	unsigned OldBitsSet = countPopulation(OldDmask);
	// Work out which is the TFE/LWE lane if that is enabled.
	if (UsesTFC) {
	TFCLane = OldBitsSet;
	}

	// Try to figure out the used register components
	for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
	I != E; ++I) {

	// Don't look at users of the chain.
	if (I.getUse().getResNo() != 0)
	continue;

	// Abort if we can't understand the usage
	if (!I->isMachineOpcode() \|\|
	I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
	return Node;

	// Lane means which subreg of %vgpra_vgprb_vgprc_vgprd is used.
	// Note that subregs are packed, i.e. Lane==0 is the first bit set
	// in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
	// set, etc.
	Lane = SubIdx2Lane(I->getConstantOperandVal(1));
	if (Lane == ~0u)
	return Node;

	// Check if the use is for the TFE/LWE generated result at VGPRn+1.
	if (UsesTFC && Lane == TFCLane) {
	Users[Lane] = *I;
	} else {
	// Set which texture component corresponds to the lane.
	unsigned Comp;
	for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
	Comp = countTrailingZeros(Dmask);
	Dmask &= ~(1 << Comp);
	}

	// Abort if we have more than one user per component.
	if (Users[Lane])
	return Node;

	Users[Lane] = *I;
	NewDmask \|= 1 << Comp;
	}
	}

	// Don't allow 0 dmask, as hardware assumes one channel enabled.
	bool NoChannels = !NewDmask;
	if (NoChannels) {
	if (!UsesTFC) {
	// No uses of the result and not using TFC. Then do nothing.
	return Node;
	}
	// If the original dmask has one channel - then nothing to do
	if (OldBitsSet == 1)
	return Node;
	// Use an arbitrary dmask - required for the instruction to work
	NewDmask = 1;
	}
	// Abort if there's no change
	if (NewDmask == OldDmask)
	return Node;

	unsigned BitsSet = countPopulation(NewDmask);

	// Check for TFE or LWE - increase the number of channels by one to account
	// for the extra return value
	// This will need adjustment for D16 if this is also included in
	// adjustWriteMask (this function) but at present D16 are excluded.
	unsigned NewChannels = BitsSet + UsesTFC;

	int NewOpcode =
	AMDGPU::getMaskedMIMGOp(Node->getMachineOpcode(), NewChannels);
	assert(NewOpcode != -1 &&
	NewOpcode != static_cast<int>(Node->getMachineOpcode()) &&
	"failed to find equivalent MIMG op");

	// Adjust the writemask in the node
	SmallVector<SDValue, 12> Ops;
	Ops.insert(Ops.end(), Node->op_begin(), Node->op_begin() + DmaskIdx);
	Ops.push_back(DAG.getTargetConstant(NewDmask, SDLoc(Node), MVT::i32));
	Ops.insert(Ops.end(), Node->op_begin() + DmaskIdx + 1, Node->op_end());

	MVT SVT = Node->getValueType(0).getVectorElementType().getSimpleVT();

	MVT ResultVT = NewChannels == 1 ?
	SVT : MVT::getVectorVT(SVT, NewChannels == 3 ? 4 :
	NewChannels == 5 ? 8 : NewChannels);
	SDVTList NewVTList = HasChain ?
	DAG.getVTList(ResultVT, MVT::Other) : DAG.getVTList(ResultVT);


	MachineSDNode *NewNode = DAG.getMachineNode(NewOpcode, SDLoc(Node),
	NewVTList, Ops);

	if (HasChain) {
	// Update chain.
	DAG.setNodeMemRefs(NewNode, Node->memoperands());
	DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), SDValue(NewNode, 1));
	}

	if (NewChannels == 1) {
	assert(Node->hasNUsesOfValue(1, 0));
	SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY,
	SDLoc(Node), Users[Lane]->getValueType(0),
	SDValue(NewNode, 0));
	DAG.ReplaceAllUsesWith(Users[Lane], Copy);
	return nullptr;
	}

	// Update the users of the node with the new indices
	for (unsigned i = 0, Idx = AMDGPU::sub0; i < 5; ++i) {
	SDNode *User = Users[i];
	if (!User) {
	// Handle the special case of NoChannels. We set NewDmask to 1 above, but
	// Users[0] is still nullptr because channel 0 doesn't really have a use.
	if (i \|\| !NoChannels)
	continue;
	} else {
	SDValue Op = DAG.getTargetConstant(Idx, SDLoc(User), MVT::i32);
	DAG.UpdateNodeOperands(User, SDValue(NewNode, 0), Op);
	}

	switch (Idx) {
	default: break;
	case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
	case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
	case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
	case AMDGPU::sub3: Idx = AMDGPU::sub4; break;
	}
	}

	DAG.RemoveDeadNode(Node);
	return nullptr;
	}

	static bool isFrameIndexOp(SDValue Op) {
	if (Op.getOpcode() == ISD::AssertZext)
	Op = Op.getOperand(0);

	return isa<FrameIndexSDNode>(Op);
	}

	/// Legalize target independent instructions (e.g. INSERT_SUBREG)
	/// with frame index operands.
	/// LLVM assumes that inputs are to these instructions are registers.
	SDNode SITargetLowering::legalizeTargetIndependentNode(SDNode Node,
	SelectionDAG &DAG) const {
	if (Node->getOpcode() == ISD::CopyToReg) {
	RegisterSDNode *DestReg = cast<RegisterSDNode>(Node->getOperand(1));
	SDValue SrcVal = Node->getOperand(2);

	// Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have
	// to try understanding copies to physical registers.
	if (SrcVal.getValueType() == MVT::i1 && DestReg->getReg().isPhysical()) {
	SDLoc SL(Node);
	MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
	SDValue VReg = DAG.getRegister(
	MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1);

	SDNode *Glued = Node->getGluedNode();
	SDValue ToVReg
	= DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal,
	SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0));
	SDValue ToResultReg
	= DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0),
	VReg, ToVReg.getValue(1));
	DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode());
	DAG.RemoveDeadNode(Node);
	return ToResultReg.getNode();
	}
	}

	SmallVector<SDValue, 8> Ops;
	for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
	if (!isFrameIndexOp(Node->getOperand(i))) {
	Ops.push_back(Node->getOperand(i));
	continue;
	}

	SDLoc DL(Node);
	Ops.push_back(SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL,
	Node->getOperand(i).getValueType(),
	Node->getOperand(i)), 0));
	}

	return DAG.UpdateNodeOperands(Node, Ops);
	}

	/// Fold the instructions after selecting them.
	/// Returns null if users were already updated.
	SDNode SITargetLowering::PostISelFolding(MachineSDNode Node,
	SelectionDAG &DAG) const {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	unsigned Opcode = Node->getMachineOpcode();

	if (TII->isMIMG(Opcode) && !TII->get(Opcode).mayStore() &&
	!TII->isGather4(Opcode) &&
	AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::dmask) != -1) {
	return adjustWritemask(Node, DAG);
	}

	if (Opcode == AMDGPU::INSERT_SUBREG \|\|
	Opcode == AMDGPU::REG_SEQUENCE) {
	legalizeTargetIndependentNode(Node, DAG);
	return Node;
	}

	switch (Opcode) {
	case AMDGPU::V_DIV_SCALE_F32_e64:
	case AMDGPU::V_DIV_SCALE_F64_e64: {
	// Satisfy the operand register constraint when one of the inputs is
	// undefined. Ordinarily each undef value will have its own implicit_def of
	// a vreg, so force these to use a single register.
	SDValue Src0 = Node->getOperand(1);
	SDValue Src1 = Node->getOperand(3);
	SDValue Src2 = Node->getOperand(5);

	if ((Src0.isMachineOpcode() &&
	Src0.getMachineOpcode() != AMDGPU::IMPLICIT_DEF) &&
	(Src0 == Src1 \|\| Src0 == Src2))
	break;

	MVT VT = Src0.getValueType().getSimpleVT();
	const TargetRegisterClass *RC =
	getRegClassFor(VT, Src0.getNode()->isDivergent());

	MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
	SDValue UndefReg = DAG.getRegister(MRI.createVirtualRegister(RC), VT);

	SDValue ImpDef = DAG.getCopyToReg(DAG.getEntryNode(), SDLoc(Node),
	UndefReg, Src0, SDValue());

	// src0 must be the same register as src1 or src2, even if the value is
	// undefined, so make sure we don't violate this constraint.
	if (Src0.isMachineOpcode() &&
	Src0.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) {
	if (Src1.isMachineOpcode() &&
	Src1.getMachineOpcode() != AMDGPU::IMPLICIT_DEF)
	Src0 = Src1;
	else if (Src2.isMachineOpcode() &&
	Src2.getMachineOpcode() != AMDGPU::IMPLICIT_DEF)
	Src0 = Src2;
	else {
	assert(Src1.getMachineOpcode() == AMDGPU::IMPLICIT_DEF);
	Src0 = UndefReg;
	Src1 = UndefReg;
	}
	} else
	break;

	SmallVector<SDValue, 9> Ops(Node->op_begin(), Node->op_end());
	Ops[1] = Src0;
	Ops[3] = Src1;
	Ops[5] = Src2;
	Ops.push_back(ImpDef.getValue(1));
	return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
	}
	default:
	break;
	}

	return Node;
	}

	// Any MIMG instructions that use tfe or lwe require an initialization of the
	// result register that will be written in the case of a memory access failure.
	// The required code is also added to tie this init code to the result of the
	// img instruction.
	void SITargetLowering::AddIMGInit(MachineInstr &MI) const {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	const SIRegisterInfo &TRI = TII->getRegisterInfo();
	MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
	MachineBasicBlock &MBB = *MI.getParent();

	MachineOperand *TFE = TII->getNamedOperand(MI, AMDGPU::OpName::tfe);
	MachineOperand *LWE = TII->getNamedOperand(MI, AMDGPU::OpName::lwe);
	MachineOperand *D16 = TII->getNamedOperand(MI, AMDGPU::OpName::d16);

	if (!TFE && !LWE) // intersect_ray
	return;

	unsigned TFEVal = TFE ? TFE->getImm() : 0;
	unsigned LWEVal = LWE->getImm();
	unsigned D16Val = D16 ? D16->getImm() : 0;

	if (!TFEVal && !LWEVal)
	return;

	// At least one of TFE or LWE are non-zero
	// We have to insert a suitable initialization of the result value and
	// tie this to the dest of the image instruction.

	const DebugLoc &DL = MI.getDebugLoc();

	int DstIdx =
	AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata);

	// Calculate which dword we have to initialize to 0.
	MachineOperand *MO_Dmask = TII->getNamedOperand(MI, AMDGPU::OpName::dmask);

	// check that dmask operand is found.
	assert(MO_Dmask && "Expected dmask operand in instruction");

	unsigned dmask = MO_Dmask->getImm();
	// Determine the number of active lanes taking into account the
	// Gather4 special case
	unsigned ActiveLanes = TII->isGather4(MI) ? 4 : countPopulation(dmask);

	bool Packed = !Subtarget->hasUnpackedD16VMem();

	unsigned InitIdx =
	D16Val && Packed ? ((ActiveLanes + 1) >> 1) + 1 : ActiveLanes + 1;

	// Abandon attempt if the dst size isn't large enough
	// - this is in fact an error but this is picked up elsewhere and
	// reported correctly.
	uint32_t DstSize = TRI.getRegSizeInBits(*TII->getOpRegClass(MI, DstIdx)) / 32;
	if (DstSize < InitIdx)
	return;

	// Create a register for the initialization value.
	Register PrevDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
	unsigned NewDst = 0; // Final initialized value will be in here

	// If PRTStrictNull feature is enabled (the default) then initialize
	// all the result registers to 0, otherwise just the error indication
	// register (VGPRn+1)
	unsigned SizeLeft = Subtarget->usePRTStrictNull() ? InitIdx : 1;
	unsigned CurrIdx = Subtarget->usePRTStrictNull() ? 0 : (InitIdx - 1);

	BuildMI(MBB, MI, DL, TII->get(AMDGPU::IMPLICIT_DEF), PrevDst);
	for (; SizeLeft; SizeLeft--, CurrIdx++) {
	NewDst = MRI.createVirtualRegister(TII->getOpRegClass(MI, DstIdx));
	// Initialize dword
	Register SubReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
	BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), SubReg)
	.addImm(0);
	// Insert into the super-reg
	BuildMI(MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst)
	.addReg(PrevDst)
	.addReg(SubReg)
	.addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx));

	PrevDst = NewDst;
	}

	// Add as an implicit operand
	MI.addOperand(MachineOperand::CreateReg(NewDst, false, true));

	// Tie the just added implicit operand to the dst
	MI.tieOperands(DstIdx, MI.getNumOperands() - 1);
	}

	/// Assign the register class depending on the number of
	/// bits set in the writemask
	void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
	SDNode *Node) const {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();

	MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();

	if (TII->isVOP3(MI.getOpcode())) {
	// Make sure constant bus requirements are respected.
	TII->legalizeOperandsVOP3(MRI, MI);

	// Prefer VGPRs over AGPRs in mAI instructions where possible.
	// This saves a chain-copy of registers and better balance register
	// use between vgpr and agpr as agpr tuples tend to be big.
	if (MI.getDesc().OpInfo) {
	unsigned Opc = MI.getOpcode();
	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	for (auto I : { AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
	AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) }) {
	if (I == -1)
	break;
	MachineOperand &Op = MI.getOperand(I);
	if (!Op.isReg() \|\| !Op.getReg().isVirtual())
	continue;
	auto *RC = TRI->getRegClassForReg(MRI, Op.getReg());
	if (!TRI->hasAGPRs(RC))
	continue;
	auto *Src = MRI.getUniqueVRegDef(Op.getReg());
	if (!Src \|\| !Src->isCopy() \|\|
	!TRI->isSGPRReg(MRI, Src->getOperand(1).getReg()))
	continue;
	auto *NewRC = TRI->getEquivalentVGPRClass(RC);
	// All uses of agpr64 and agpr32 can also accept vgpr except for
	// v_accvgpr_read, but we do not produce agpr reads during selection,
	// so no use checks are needed.
	MRI.setRegClass(Op.getReg(), NewRC);
	}

	// Resolve the rest of AV operands to AGPRs.
	if (auto *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2)) {
	if (Src2->isReg() && Src2->getReg().isVirtual()) {
	auto *RC = TRI->getRegClassForReg(MRI, Src2->getReg());
	if (TRI->isVectorSuperClass(RC)) {
	auto *NewRC = TRI->getEquivalentAGPRClass(RC);
	MRI.setRegClass(Src2->getReg(), NewRC);
	if (Src2->isTied())
	MRI.setRegClass(MI.getOperand(0).getReg(), NewRC);
	}
	}
	}
	}

	return;
	}

	if (TII->isMIMG(MI)) {
	if (!MI.mayStore())
	AddIMGInit(MI);
	TII->enforceOperandRCAlignment(MI, AMDGPU::OpName::vaddr);
	}
	}

	static SDValue buildSMovImm32(SelectionDAG &DAG, const SDLoc &DL,
	uint64_t Val) {
	SDValue K = DAG.getTargetConstant(Val, DL, MVT::i32);
	return SDValue(DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, K), 0);
	}

	MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
	const SDLoc &DL,
	SDValue Ptr) const {
	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();

	// Build the half of the subregister with the constants before building the
	// full 128-bit register. If we are building multiple resource descriptors,
	// this will allow CSEing of the 2-component register.
	const SDValue Ops0[] = {
	DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
	buildSMovImm32(DAG, DL, 0),
	DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
	buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
	DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
	};

	SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
	MVT::v2i32, Ops0), 0);

	// Combine the constants and the pointer.
	const SDValue Ops1[] = {
	DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
	Ptr,
	DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
	SubRegHi,
	DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
	};

	return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
	}

	/// Return a resource descriptor with the 'Add TID' bit enabled
	/// The TID (Thread ID) is multiplied by the stride value (bits [61:48]
	/// of the resource descriptor) to create an offset, which is added to
	/// the resource pointer.
	MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG, const SDLoc &DL,
	SDValue Ptr, uint32_t RsrcDword1,
	uint64_t RsrcDword2And3) const {
	SDValue PtrLo = DAG.getTargetExtractSubreg(AMDGPU::sub0, DL, MVT::i32, Ptr);
	SDValue PtrHi = DAG.getTargetExtractSubreg(AMDGPU::sub1, DL, MVT::i32, Ptr);
	if (RsrcDword1) {
	PtrHi = SDValue(DAG.getMachineNode(AMDGPU::S_OR_B32, DL, MVT::i32, PtrHi,
	DAG.getConstant(RsrcDword1, DL, MVT::i32)),
	0);
	}

	SDValue DataLo = buildSMovImm32(DAG, DL,
	RsrcDword2And3 & UINT64_C(0xFFFFFFFF));
	SDValue DataHi = buildSMovImm32(DAG, DL, RsrcDword2And3 >> 32);

	const SDValue Ops[] = {
	DAG.getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32),
	PtrLo,
	DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
	PtrHi,
	DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
	DataLo,
	DAG.getTargetConstant(AMDGPU::sub2, DL, MVT::i32),
	DataHi,
	DAG.getTargetConstant(AMDGPU::sub3, DL, MVT::i32)
	};

	return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
	}

	//===----------------------------------------------------------------------===//
	// SI Inline Assembly Support
	//===----------------------------------------------------------------------===//

	std::pair<unsigned, const TargetRegisterClass *>
	SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
	StringRef Constraint,
	MVT VT) const {
	const SIRegisterInfo TRI = static_cast<const SIRegisterInfo >(TRI_);

	const TargetRegisterClass *RC = nullptr;
	if (Constraint.size() == 1) {
	const unsigned BitWidth = VT.getSizeInBits();
	switch (Constraint[0]) {
	default:
	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
	case 's':
	case 'r':
	switch (BitWidth) {
	case 16:
	RC = &AMDGPU::SReg_32RegClass;
	break;
	case 64:
	RC = &AMDGPU::SGPR_64RegClass;
	break;
	default:
	RC = SIRegisterInfo::getSGPRClassForBitWidth(BitWidth);
	if (!RC)
	return std::make_pair(0U, nullptr);
	break;
	}
	break;
	case 'v':
	switch (BitWidth) {
	case 16:
	RC = &AMDGPU::VGPR_32RegClass;
	break;
	default:
	RC = TRI->getVGPRClassForBitWidth(BitWidth);
	if (!RC)
	return std::make_pair(0U, nullptr);
	break;
	}
	break;
	case 'a':
	if (!Subtarget->hasMAIInsts())
	break;
	switch (BitWidth) {
	case 16:
	RC = &AMDGPU::AGPR_32RegClass;
	break;
	default:
	RC = TRI->getAGPRClassForBitWidth(BitWidth);
	if (!RC)
	return std::make_pair(0U, nullptr);
	break;
	}
	break;
	}
	// We actually support i128, i16 and f16 as inline parameters
	// even if they are not reported as legal
	if (RC && (isTypeLegal(VT) \|\| VT.SimpleTy == MVT::i128 \|\|
	VT.SimpleTy == MVT::i16 \|\| VT.SimpleTy == MVT::f16))
	return std::make_pair(0U, RC);
	}

	if (Constraint.startswith("{") && Constraint.endswith("}")) {
	StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
	if (RegName.consume_front("v")) {
	RC = &AMDGPU::VGPR_32RegClass;
	} else if (RegName.consume_front("s")) {
	RC = &AMDGPU::SGPR_32RegClass;
	} else if (RegName.consume_front("a")) {
	RC = &AMDGPU::AGPR_32RegClass;
	}

	if (RC) {
	uint32_t Idx;
	if (RegName.consume_front("[")) {
	uint32_t End;
	bool Failed = RegName.consumeInteger(10, Idx);
	Failed \|= !RegName.consume_front(":");
	Failed \|= RegName.consumeInteger(10, End);
	Failed \|= !RegName.consume_back("]");
	if (!Failed) {
	uint32_t Width = (End - Idx + 1) * 32;
	MCRegister Reg = RC->getRegister(Idx);
	if (SIRegisterInfo::isVGPRClass(RC))
	RC = TRI->getVGPRClassForBitWidth(Width);
	else if (SIRegisterInfo::isSGPRClass(RC))
	RC = TRI->getSGPRClassForBitWidth(Width);
	else if (SIRegisterInfo::isAGPRClass(RC))
	RC = TRI->getAGPRClassForBitWidth(Width);
	if (RC) {
	Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC);
	return std::make_pair(Reg, RC);
	}
	}
	} else {
	bool Failed = RegName.getAsInteger(10, Idx);
	if (!Failed && Idx < RC->getNumRegs())
	return std::make_pair(RC->getRegister(Idx), RC);
	}
	}
	}

	auto Ret = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
	if (Ret.first)
	Ret.second = TRI->getPhysRegClass(Ret.first);

	return Ret;
	}

	static bool isImmConstraint(StringRef Constraint) {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default: break;
	case 'I':
	case 'J':
	case 'A':
	case 'B':
	case 'C':
	return true;
	}
	} else if (Constraint == "DA" \|\|
	Constraint == "DB") {
	return true;
	}
	return false;
	}

	SITargetLowering::ConstraintType
	SITargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default: break;
	case 's':
	case 'v':
	case 'a':
	return C_RegisterClass;
	}
	}
	if (isImmConstraint(Constraint)) {
	return C_Other;
	}
	return TargetLowering::getConstraintType(Constraint);
	}

	static uint64_t clearUnusedBits(uint64_t Val, unsigned Size) {
	if (!AMDGPU::isInlinableIntLiteral(Val)) {
	Val = Val & maskTrailingOnes<uint64_t>(Size);
	}
	return Val;
	}

	void SITargetLowering::LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {
	if (isImmConstraint(Constraint)) {
	uint64_t Val;
	if (getAsmOperandConstVal(Op, Val) &&
	checkAsmConstraintVal(Op, Constraint, Val)) {
	Val = clearUnusedBits(Val, Op.getScalarValueSizeInBits());
	Ops.push_back(DAG.getTargetConstant(Val, SDLoc(Op), MVT::i64));
	}
	} else {
	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}
	}

	bool SITargetLowering::getAsmOperandConstVal(SDValue Op, uint64_t &Val) const {
	unsigned Size = Op.getScalarValueSizeInBits();
	if (Size > 64)
	return false;

	if (Size == 16 && !Subtarget->has16BitInsts())
	return false;

	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	Val = C->getSExtValue();
	return true;
	}
	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) {
	Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
	return true;
	}
	if (BuildVectorSDNode *V = dyn_cast<BuildVectorSDNode>(Op)) {
	if (Size != 16 \|\| Op.getNumOperands() != 2)
	return false;
	if (Op.getOperand(0).isUndef() \|\| Op.getOperand(1).isUndef())
	return false;
	if (ConstantSDNode *C = V->getConstantSplatNode()) {
	Val = C->getSExtValue();
	return true;
	}
	if (ConstantFPSDNode *C = V->getConstantFPSplatNode()) {
	Val = C->getValueAPF().bitcastToAPInt().getSExtValue();
	return true;
	}
	}

	return false;
	}

	bool SITargetLowering::checkAsmConstraintVal(SDValue Op,
	const std::string &Constraint,
	uint64_t Val) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'I':
	return AMDGPU::isInlinableIntLiteral(Val);
	case 'J':
	return isInt<16>(Val);
	case 'A':
	return checkAsmConstraintValA(Op, Val);
	case 'B':
	return isInt<32>(Val);
	case 'C':
	return isUInt<32>(clearUnusedBits(Val, Op.getScalarValueSizeInBits())) \|\|
	AMDGPU::isInlinableIntLiteral(Val);
	default:
	break;
	}
	} else if (Constraint.size() == 2) {
	if (Constraint == "DA") {
	int64_t HiBits = static_cast<int32_t>(Val >> 32);
	int64_t LoBits = static_cast<int32_t>(Val);
	return checkAsmConstraintValA(Op, HiBits, 32) &&
	checkAsmConstraintValA(Op, LoBits, 32);
	}
	if (Constraint == "DB") {
	return true;
	}
	}
	llvm_unreachable("Invalid asm constraint");
	}

	bool SITargetLowering::checkAsmConstraintValA(SDValue Op,
	uint64_t Val,
	unsigned MaxSize) const {
	unsigned Size = std::min<unsigned>(Op.getScalarValueSizeInBits(), MaxSize);
	bool HasInv2Pi = Subtarget->hasInv2PiInlineImm();
	if ((Size == 16 && AMDGPU::isInlinableLiteral16(Val, HasInv2Pi)) \|\|
	(Size == 32 && AMDGPU::isInlinableLiteral32(Val, HasInv2Pi)) \|\|
	(Size == 64 && AMDGPU::isInlinableLiteral64(Val, HasInv2Pi))) {
	return true;
	}
	return false;
	}

	static int getAlignedAGPRClassID(unsigned UnalignedClassID) {
	switch (UnalignedClassID) {
	case AMDGPU::VReg_64RegClassID:
	return AMDGPU::VReg_64_Align2RegClassID;
	case AMDGPU::VReg_96RegClassID:
	return AMDGPU::VReg_96_Align2RegClassID;
	case AMDGPU::VReg_128RegClassID:
	return AMDGPU::VReg_128_Align2RegClassID;
	case AMDGPU::VReg_160RegClassID:
	return AMDGPU::VReg_160_Align2RegClassID;
	case AMDGPU::VReg_192RegClassID:
	return AMDGPU::VReg_192_Align2RegClassID;
	case AMDGPU::VReg_224RegClassID:
	return AMDGPU::VReg_224_Align2RegClassID;
	case AMDGPU::VReg_256RegClassID:
	return AMDGPU::VReg_256_Align2RegClassID;
	case AMDGPU::VReg_512RegClassID:
	return AMDGPU::VReg_512_Align2RegClassID;
	case AMDGPU::VReg_1024RegClassID:
	return AMDGPU::VReg_1024_Align2RegClassID;
	case AMDGPU::AReg_64RegClassID:
	return AMDGPU::AReg_64_Align2RegClassID;
	case AMDGPU::AReg_96RegClassID:
	return AMDGPU::AReg_96_Align2RegClassID;
	case AMDGPU::AReg_128RegClassID:
	return AMDGPU::AReg_128_Align2RegClassID;
	case AMDGPU::AReg_160RegClassID:
	return AMDGPU::AReg_160_Align2RegClassID;
	case AMDGPU::AReg_192RegClassID:
	return AMDGPU::AReg_192_Align2RegClassID;
	case AMDGPU::AReg_256RegClassID:
	return AMDGPU::AReg_256_Align2RegClassID;
	case AMDGPU::AReg_512RegClassID:
	return AMDGPU::AReg_512_Align2RegClassID;
	case AMDGPU::AReg_1024RegClassID:
	return AMDGPU::AReg_1024_Align2RegClassID;
	default:
	return -1;
	}
	}

	// Figure out which registers should be reserved for stack access. Only after
	// the function is legalized do we know all of the non-spill stack objects or if
	// calls are present.
	void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
	MachineRegisterInfo &MRI = MF.getRegInfo();
	SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
	const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const SIInstrInfo *TII = ST.getInstrInfo();

	if (Info->isEntryFunction()) {
	// Callable functions have fixed registers used for stack access.
	reservePrivateMemoryRegs(getTargetMachine(), MF, TRI, Info);
	}

	assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
	Info->getStackPtrOffsetReg()));
	if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
	MRI.replaceRegWith(AMDGPU::SP_REG, Info->getStackPtrOffsetReg());

	// We need to worry about replacing the default register with itself in case
	// of MIR testcases missing the MFI.
	if (Info->getScratchRSrcReg() != AMDGPU::PRIVATE_RSRC_REG)
	MRI.replaceRegWith(AMDGPU::PRIVATE_RSRC_REG, Info->getScratchRSrcReg());

	if (Info->getFrameOffsetReg() != AMDGPU::FP_REG)
	MRI.replaceRegWith(AMDGPU::FP_REG, Info->getFrameOffsetReg());

	Info->limitOccupancy(MF);

	if (ST.isWave32() && !MF.empty()) {
	for (auto &MBB : MF) {
	for (auto &MI : MBB) {
	TII->fixImplicitOperands(MI);
	}
	}
	}

	// FIXME: This is a hack to fixup AGPR classes to use the properly aligned
	// classes if required. Ideally the register class constraints would differ
	// per-subtarget, but there's no easy way to achieve that right now. This is
	// not a problem for VGPRs because the correctly aligned VGPR class is implied
	// from using them as the register class for legal types.
	if (ST.needsAlignedVGPRs()) {
	for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
	const Register Reg = Register::index2VirtReg(I);
	const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
	if (!RC)
	continue;
	int NewClassID = getAlignedAGPRClassID(RC->getID());
	if (NewClassID != -1)
	MRI.setRegClass(Reg, TRI->getRegClass(NewClassID));
	}
	}

	TargetLoweringBase::finalizeLowering(MF);
	}

	void SITargetLowering::computeKnownBitsForFrameIndex(
	const int FI, KnownBits &Known, const MachineFunction &MF) const {
	TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);

	// Set the high bits to zero based on the maximum allowed scratch size per
	// wave. We can't use vaddr in MUBUF instructions if we don't know the address
	// calculation won't overflow, so assume the sign bit is never set.
	Known.Zero.setHighBits(getSubtarget()->getKnownHighZeroBitsForFrameIndex());
	}

	static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB,
	KnownBits &Known, unsigned Dim) {
	unsigned MaxValue =
	ST.getMaxWorkitemID(KB.getMachineFunction().getFunction(), Dim);
	Known.Zero.setHighBits(countLeadingZeros(MaxValue));
	}

	void SITargetLowering::computeKnownBitsForTargetInstr(
	GISelKnownBits &KB, Register R, KnownBits &Known, const APInt &DemandedElts,
	const MachineRegisterInfo &MRI, unsigned Depth) const {
	const MachineInstr *MI = MRI.getVRegDef(R);
	switch (MI->getOpcode()) {
	case AMDGPU::G_INTRINSIC: {
	switch (MI->getIntrinsicID()) {
	case Intrinsic::amdgcn_workitem_id_x:
	knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
	break;
	case Intrinsic::amdgcn_workitem_id_y:
	knownBitsForWorkitemID(*getSubtarget(), KB, Known, 1);
	break;
	case Intrinsic::amdgcn_workitem_id_z:
	knownBitsForWorkitemID(*getSubtarget(), KB, Known, 2);
	break;
	case Intrinsic::amdgcn_mbcnt_lo:
	case Intrinsic::amdgcn_mbcnt_hi: {
	// These return at most the wavefront size - 1.
	unsigned Size = MRI.getType(R).getSizeInBits();
	Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2());
	break;
	}
	case Intrinsic::amdgcn_groupstaticsize: {
	// We can report everything over the maximum size as 0. We can't report
	// based on the actual size because we don't know if it's accurate or not
	// at any given point.
	Known.Zero.setHighBits(countLeadingZeros(getSubtarget()->getLocalMemorySize()));
	break;
	}
	}
	break;
	}
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
	Known.Zero.setHighBits(24);
	break;
	case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
	Known.Zero.setHighBits(16);
	break;
	}
	}

	Align SITargetLowering::computeKnownAlignForTargetInstr(
	GISelKnownBits &KB, Register R, const MachineRegisterInfo &MRI,
	unsigned Depth) const {
	const MachineInstr *MI = MRI.getVRegDef(R);
	switch (MI->getOpcode()) {
	case AMDGPU::G_INTRINSIC:
	case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
	// FIXME: Can this move to generic code? What about the case where the call
	// site specifies a lower alignment?
	Intrinsic::ID IID = MI->getIntrinsicID();
	LLVMContext &Ctx = KB.getMachineFunction().getFunction().getContext();
	AttributeList Attrs = Intrinsic::getAttributes(Ctx, IID);
	if (MaybeAlign RetAlign = Attrs.getRetAlignment())
	return *RetAlign;
	return Align(1);
	}
	default:
	return Align(1);
	}
	}

	Align SITargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
	const Align PrefAlign = TargetLowering::getPrefLoopAlignment(ML);
	const Align CacheLineAlign = Align(64);

	// Pre-GFX10 target did not benefit from loop alignment
	if (!ML \|\| DisableLoopAlignment \|\|
	(getSubtarget()->getGeneration() < AMDGPUSubtarget::GFX10) \|\|
	getSubtarget()->hasInstFwdPrefetchBug())
	return PrefAlign;

	// On GFX10 I$ is 4 x 64 bytes cache lines.
	// By default prefetcher keeps one cache line behind and reads two ahead.
	// We can modify it with S_INST_PREFETCH for larger loops to have two lines
	// behind and one ahead.
	// Therefor we can benefit from aligning loop headers if loop fits 192 bytes.
	// If loop fits 64 bytes it always spans no more than two cache lines and
	// does not need an alignment.
	// Else if loop is less or equal 128 bytes we do not need to modify prefetch,
	// Else if loop is less or equal 192 bytes we need two lines behind.

	const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
	const MachineBasicBlock *Header = ML->getHeader();
	if (Header->getAlignment() != PrefAlign)
	return Header->getAlignment(); // Already processed.

	unsigned LoopSize = 0;
	for (const MachineBasicBlock *MBB : ML->blocks()) {
	// If inner loop block is aligned assume in average half of the alignment
	// size to be added as nops.
	if (MBB != Header)
	LoopSize += MBB->getAlignment().value() / 2;

	for (const MachineInstr &MI : *MBB) {
	LoopSize += TII->getInstSizeInBytes(MI);
	if (LoopSize > 192)
	return PrefAlign;
	}
	}

	if (LoopSize <= 64)
	return PrefAlign;

	if (LoopSize <= 128)
	return CacheLineAlign;

	// If any of parent loops is surrounded by prefetch instructions do not
	// insert new for inner loop, which would reset parent's settings.
	for (MachineLoop *P = ML->getParentLoop(); P; P = P->getParentLoop()) {
	if (MachineBasicBlock *Exit = P->getExitBlock()) {
	auto I = Exit->getFirstNonDebugInstr();
	if (I != Exit->end() && I->getOpcode() == AMDGPU::S_INST_PREFETCH)
	return CacheLineAlign;
	}
	}

	MachineBasicBlock *Pre = ML->getLoopPreheader();
	MachineBasicBlock *Exit = ML->getExitBlock();

	if (Pre && Exit) {
	auto PreTerm = Pre->getFirstTerminator();
	if (PreTerm == Pre->begin() \|\|
	std::prev(PreTerm)->getOpcode() != AMDGPU::S_INST_PREFETCH)
	BuildMI(*Pre, PreTerm, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
	.addImm(1); // prefetch 2 lines behind PC

	auto ExitHead = Exit->getFirstNonDebugInstr();
	if (ExitHead == Exit->end() \|\|
	ExitHead->getOpcode() != AMDGPU::S_INST_PREFETCH)
	BuildMI(*Exit, ExitHead, DebugLoc(), TII->get(AMDGPU::S_INST_PREFETCH))
	.addImm(2); // prefetch 1 line behind PC
	}

	return CacheLineAlign;
	}

	LLVM_ATTRIBUTE_UNUSED
	static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
	assert(N->getOpcode() == ISD::CopyFromReg);
	do {
	// Follow the chain until we find an INLINEASM node.
	N = N->getOperand(0).getNode();
	if (N->getOpcode() == ISD::INLINEASM \|\|
	N->getOpcode() == ISD::INLINEASM_BR)
	return true;
	} while (N->getOpcode() == ISD::CopyFromReg);
	return false;
	}

	bool SITargetLowering::isSDNodeSourceOfDivergence(
	const SDNode N, FunctionLoweringInfo FLI,
	LegacyDivergenceAnalysis *KDA) const {
	switch (N->getOpcode()) {
	case ISD::CopyFromReg: {
	const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
	const MachineRegisterInfo &MRI = FLI->MF->getRegInfo();
	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	Register Reg = R->getReg();

	// FIXME: Why does this need to consider isLiveIn?
	if (Reg.isPhysical() \|\| MRI.isLiveIn(Reg))
	return !TRI->isSGPRReg(MRI, Reg);

	if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
	return KDA->isDivergent(V);

	assert(Reg == FLI->DemoteRegister \|\| isCopyFromRegOfInlineAsm(N));
	return !TRI->isSGPRReg(MRI, Reg);
	}
	case ISD::LOAD: {
	const LoadSDNode *L = cast<LoadSDNode>(N);
	unsigned AS = L->getAddressSpace();
	// A flat load may access private memory.
	return AS == AMDGPUAS::PRIVATE_ADDRESS \|\| AS == AMDGPUAS::FLAT_ADDRESS;
	}
	case ISD::CALLSEQ_END:
	return true;
	case ISD::INTRINSIC_WO_CHAIN:
	return AMDGPU::isIntrinsicSourceOfDivergence(
	cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
	case ISD::INTRINSIC_W_CHAIN:
	return AMDGPU::isIntrinsicSourceOfDivergence(
	cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
	case AMDGPUISD::ATOMIC_CMP_SWAP:
	case AMDGPUISD::ATOMIC_INC:
	case AMDGPUISD::ATOMIC_DEC:
	case AMDGPUISD::ATOMIC_LOAD_FMIN:
	case AMDGPUISD::ATOMIC_LOAD_FMAX:
	case AMDGPUISD::BUFFER_ATOMIC_SWAP:
	case AMDGPUISD::BUFFER_ATOMIC_ADD:
	case AMDGPUISD::BUFFER_ATOMIC_SUB:
	case AMDGPUISD::BUFFER_ATOMIC_SMIN:
	case AMDGPUISD::BUFFER_ATOMIC_UMIN:
	case AMDGPUISD::BUFFER_ATOMIC_SMAX:
	case AMDGPUISD::BUFFER_ATOMIC_UMAX:
	case AMDGPUISD::BUFFER_ATOMIC_AND:
	case AMDGPUISD::BUFFER_ATOMIC_OR:
	case AMDGPUISD::BUFFER_ATOMIC_XOR:
	case AMDGPUISD::BUFFER_ATOMIC_INC:
	case AMDGPUISD::BUFFER_ATOMIC_DEC:
	case AMDGPUISD::BUFFER_ATOMIC_CMPSWAP:
	case AMDGPUISD::BUFFER_ATOMIC_CSUB:
	case AMDGPUISD::BUFFER_ATOMIC_FADD:
	case AMDGPUISD::BUFFER_ATOMIC_FMIN:
	case AMDGPUISD::BUFFER_ATOMIC_FMAX:
	// Target-specific read-modify-write atomics are sources of divergence.
	return true;
	default:
	if (auto *A = dyn_cast<AtomicSDNode>(N)) {
	// Generic read-modify-write atomics are sources of divergence.
	return A->readMem() && A->writeMem();
	}
	return false;
	}
	}

	bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
	EVT VT) const {
	switch (VT.getScalarType().getSimpleVT().SimpleTy) {
	case MVT::f32:
	return hasFP32Denormals(DAG.getMachineFunction());
	case MVT::f64:
	case MVT::f16:
	return hasFP64FP16Denormals(DAG.getMachineFunction());
	default:
	return false;
	}
	}

	bool SITargetLowering::denormalsEnabledForType(LLT Ty,
	MachineFunction &MF) const {
	switch (Ty.getScalarSizeInBits()) {
	case 32:
	return hasFP32Denormals(MF);
	case 64:
	case 16:
	return hasFP64FP16Denormals(MF);
	default:
	return false;
	}
	}

	bool SITargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
	const SelectionDAG &DAG,
	bool SNaN,
	unsigned Depth) const {
	if (Op.getOpcode() == AMDGPUISD::CLAMP) {
	const MachineFunction &MF = DAG.getMachineFunction();
	const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

	if (Info->getMode().DX10Clamp)
	return true; // Clamped to 0.
	return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
	}

	return AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(Op, DAG,
	SNaN, Depth);
	}

	// Global FP atomic instructions have a hardcoded FP mode and do not support
	// FP32 denormals, and only support v2f16 denormals.
	static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
	const fltSemantics &Flt = RMW->getType()->getScalarType()->getFltSemantics();
	auto DenormMode = RMW->getParent()->getParent()->getDenormalMode(Flt);
	if (&Flt == &APFloat::IEEEsingle())
	return DenormMode == DenormalMode::getPreserveSign();
	return DenormMode == DenormalMode::getIEEE();
	}

	TargetLowering::AtomicExpansionKind
	SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
	unsigned AS = RMW->getPointerAddressSpace();
	if (AS == AMDGPUAS::PRIVATE_ADDRESS)
	return AtomicExpansionKind::NotAtomic;

	auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
	OptimizationRemarkEmitter ORE(RMW->getFunction());
	LLVMContext &Ctx = RMW->getFunction()->getContext();
	SmallVector<StringRef> SSNs;
	Ctx.getSyncScopeNames(SSNs);
	auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
	? "system"
	: SSNs[RMW->getSyncScopeID()];
	ORE.emit([&]() {
	return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
	<< "Hardware instruction generated for atomic "
	<< RMW->getOperationName(RMW->getOperation())
	<< " operation at memory scope " << MemScope
	<< " due to an unsafe request.";
	});
	return Kind;
	};

	switch (RMW->getOperation()) {
	case AtomicRMWInst::FAdd: {
	Type *Ty = RMW->getType();

	// We don't have a way to support 16-bit atomics now, so just leave them
	// as-is.
	if (Ty->isHalfTy())
	return AtomicExpansionKind::None;

	if (!Ty->isFloatTy() && (!Subtarget->hasGFX90AInsts() \|\| !Ty->isDoubleTy()))
	return AtomicExpansionKind::CmpXChg;

	if ((AS == AMDGPUAS::GLOBAL_ADDRESS \|\| AS == AMDGPUAS::FLAT_ADDRESS) &&
	Subtarget->hasAtomicFaddNoRtnInsts()) {
	if (Subtarget->hasGFX940Insts())
	return AtomicExpansionKind::None;

	// The amdgpu-unsafe-fp-atomics attribute enables generation of unsafe
	// floating point atomic instructions. May generate more efficient code,
	// but may not respect rounding and denormal modes, and may give incorrect
	// results for certain memory destinations.
	if (RMW->getFunction()
	->getFnAttribute("amdgpu-unsafe-fp-atomics")
	.getValueAsString() != "true")
	return AtomicExpansionKind::CmpXChg;

	if (Subtarget->hasGFX90AInsts()) {
	if (Ty->isFloatTy() && AS == AMDGPUAS::FLAT_ADDRESS)
	return AtomicExpansionKind::CmpXChg;

	auto SSID = RMW->getSyncScopeID();
	if (SSID == SyncScope::System \|\|
	SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
	return AtomicExpansionKind::CmpXChg;

	return ReportUnsafeHWInst(AtomicExpansionKind::None);
	}

	if (AS == AMDGPUAS::FLAT_ADDRESS)
	return AtomicExpansionKind::CmpXChg;

	return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
	: AtomicExpansionKind::CmpXChg;
	}

	// DS FP atomics do respect the denormal mode, but the rounding mode is
	// fixed to round-to-nearest-even.
	// The only exception is DS_ADD_F64 which never flushes regardless of mode.
	if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
	if (!Ty->isDoubleTy())
	return AtomicExpansionKind::None;

	if (fpModeMatchesGlobalFPAtomicMode(RMW))
	return AtomicExpansionKind::None;

	return RMW->getFunction()
	->getFnAttribute("amdgpu-unsafe-fp-atomics")
	.getValueAsString() == "true"
	? ReportUnsafeHWInst(AtomicExpansionKind::None)
	: AtomicExpansionKind::CmpXChg;
	}

	return AtomicExpansionKind::CmpXChg;
	}
	default:
	break;
	}

	return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
	}

	TargetLowering::AtomicExpansionKind
	SITargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
	return LI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
	? AtomicExpansionKind::NotAtomic
	: AtomicExpansionKind::None;
	}

	TargetLowering::AtomicExpansionKind
	SITargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
	return SI->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
	? AtomicExpansionKind::NotAtomic
	: AtomicExpansionKind::None;
	}

	TargetLowering::AtomicExpansionKind
	SITargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CmpX) const {
	return CmpX->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS
	? AtomicExpansionKind::NotAtomic
	: AtomicExpansionKind::None;
	}

	const TargetRegisterClass *
	SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
	const TargetRegisterClass *RC = TargetLoweringBase::getRegClassFor(VT, false);
	const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
	if (RC == &AMDGPU::VReg_1RegClass && !isDivergent)
	return Subtarget->getWavefrontSize() == 64 ? &AMDGPU::SReg_64RegClass
	: &AMDGPU::SReg_32RegClass;
	if (!TRI->isSGPRClass(RC) && !isDivergent)
	return TRI->getEquivalentSGPRClass(RC);
	else if (TRI->isSGPRClass(RC) && isDivergent)
	return TRI->getEquivalentVGPRClass(RC);

	return RC;
	}

	// FIXME: This is a workaround for DivergenceAnalysis not understanding always
	// uniform values (as produced by the mask results of control flow intrinsics)
	// used outside of divergent blocks. The phi users need to also be treated as
	// always uniform.
	static bool hasCFUser(const Value V, SmallPtrSet<const Value , 16> &Visited,
	unsigned WaveSize) {
	// FIXME: We assume we never cast the mask results of a control flow
	// intrinsic.
	// Early exit if the type won't be consistent as a compile time hack.
	IntegerType *IT = dyn_cast<IntegerType>(V->getType());
	if (!IT \|\| IT->getBitWidth() != WaveSize)
	return false;

	if (!isa<Instruction>(V))
	return false;
	if (!Visited.insert(V).second)
	return false;
	bool Result = false;
	for (auto U : V->users()) {
	if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(U)) {
	if (V == U->getOperand(1)) {
	switch (Intrinsic->getIntrinsicID()) {
	default:
	Result = false;
	break;
	case Intrinsic::amdgcn_if_break:
	case Intrinsic::amdgcn_if:
	case Intrinsic::amdgcn_else:
	Result = true;
	break;
	}
	}
	if (V == U->getOperand(0)) {
	switch (Intrinsic->getIntrinsicID()) {
	default:
	Result = false;
	break;
	case Intrinsic::amdgcn_end_cf:
	case Intrinsic::amdgcn_loop:
	Result = true;
	break;
	}
	}
	} else {
	Result = hasCFUser(U, Visited, WaveSize);
	}
	if (Result)
	break;
	}
	return Result;
	}

	bool SITargetLowering::requiresUniformRegister(MachineFunction &MF,
	const Value *V) const {
	if (const CallInst *CI = dyn_cast<CallInst>(V)) {
	if (CI->isInlineAsm()) {
	// FIXME: This cannot give a correct answer. This should only trigger in
	// the case where inline asm returns mixed SGPR and VGPR results, used
	// outside the defining block. We don't have a specific result to
	// consider, so this assumes if any value is SGPR, the overall register
	// also needs to be SGPR.
	const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
	TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints(
	MF.getDataLayout(), Subtarget->getRegisterInfo(), *CI);
	for (auto &TC : TargetConstraints) {
	if (TC.Type == InlineAsm::isOutput) {
	ComputeConstraintToUse(TC, SDValue());
	const TargetRegisterClass *RC = getRegForInlineAsmConstraint(
	SIRI, TC.ConstraintCode, TC.ConstraintVT).second;
	if (RC && SIRI->isSGPRClass(RC))
	return true;
	}
	}
	}
	}
	SmallPtrSet<const Value *, 16> Visited;
	return hasCFUser(V, Visited, Subtarget->getWavefrontSize());
	}

	std::pair<InstructionCost, MVT>
	SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
	Type *Ty) const {
	std::pair<InstructionCost, MVT> Cost =
	TargetLoweringBase::getTypeLegalizationCost(DL, Ty);
	auto Size = DL.getTypeSizeInBits(Ty);
	// Maximum load or store can handle 8 dwords for scalar and 4 for
	// vector ALU. Let's assume anything above 8 dwords is expensive
	// even if legal.
	if (Size <= 256)
	return Cost;

	Cost.first += (Size + 255) / 256;
	return Cost;
	}

	bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
	SDNode::use_iterator I = N->use_begin(), E = N->use_end();
	for (; I != E; ++I) {
	if (MemSDNode M = dyn_cast<MemSDNode>(I)) {
	if (getBasePtrIndex(M) == I.getOperandNo())
	return true;
	}
	}
	return false;
	}

	bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
	SDValue N1) const {
	if (!N0.hasOneUse())
	return false;
	// Take care of the opportunity to keep N0 uniform
	if (N0->isDivergent() \|\| !N1->isDivergent())
	return true;
	// Check if we have a good chance to form the memory access pattern with the
	// base and offset
	return (DAG.isBaseWithConstantOffset(N0) &&
	hasMemSDNodeUser(*N0->use_begin()));
	}

	MachineMemOperand::Flags
	SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
	// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
	if (I.getMetadata("amdgpu.noclobber"))
	return MONoClobber;
	return MachineMemOperand::MONone;
	}
	diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
	index 3c102463ba08..cbfd2bc68f18 100644
	--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
	+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
	@@ -1,2385 +1,2385 @@
	//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "ARMTargetTransformInfo.h"
	#include "ARMSubtarget.h"
	#include "MCTargetDesc/ARMAddressingModes.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/Analysis/LoopInfo.h"
	#include "llvm/CodeGen/CostTable.h"
	#include "llvm/CodeGen/ISDOpcodes.h"
	#include "llvm/CodeGen/ValueTypes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsARM.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Type.h"
	#include "llvm/MC/SubtargetFeature.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MachineValueType.h"
	#include "llvm/Target/TargetMachine.h"
	#include "llvm/Transforms/InstCombine/InstCombiner.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/LoopUtils.h"
	#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <utility>

	using namespace llvm;

	#define DEBUG_TYPE "armtti"

	static cl::opt<bool> EnableMaskedLoadStores(
	"enable-arm-maskedldst", cl::Hidden, cl::init(true),
	cl::desc("Enable the generation of masked loads and stores"));

	static cl::opt<bool> DisableLowOverheadLoops(
	"disable-arm-loloops", cl::Hidden, cl::init(false),
	cl::desc("Disable the generation of low-overhead loops"));

	static cl::opt<bool>
	AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true),
	cl::desc("Enable the generation of WLS loops"));

	extern cl::opt<TailPredication::Mode> EnableTailPredication;

	extern cl::opt<bool> EnableMaskedGatherScatters;

	extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor;

	/// Convert a vector load intrinsic into a simple llvm load instruction.
	/// This is beneficial when the underlying object being addressed comes
	/// from a constant, since we get constant-folding for free.
	static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign,
	InstCombiner::BuilderTy &Builder) {
	auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));

	if (!IntrAlign)
	return nullptr;

	unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign
	? MemAlign
	: IntrAlign->getLimitedValue();

	if (!isPowerOf2_32(Alignment))
	return nullptr;

	auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
	PointerType::get(II.getType(), 0));
	return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment));
	}

	bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
	const Function *Callee) const {
	const TargetMachine &TM = getTLI()->getTargetMachine();
	const FeatureBitset &CallerBits =
	TM.getSubtargetImpl(*Caller)->getFeatureBits();
	const FeatureBitset &CalleeBits =
	TM.getSubtargetImpl(*Callee)->getFeatureBits();

	// To inline a callee, all features not in the allowed list must match exactly.
	bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) ==
	(CalleeBits & ~InlineFeaturesAllowed);
	// For features in the allowed list, the callee's features must be a subset of
	// the callers'.
	bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) ==
	(CalleeBits & InlineFeaturesAllowed);
	return MatchExact && MatchSubset;
	}

	TTI::AddressingModeKind
	ARMTTIImpl::getPreferredAddressingMode(const Loop *L,
	ScalarEvolution *SE) const {
	if (ST->hasMVEIntegerOps())
	return TTI::AMK_PostIndexed;

	if (L->getHeader()->getParent()->hasOptSize())
	return TTI::AMK_None;

	if (ST->isMClass() && ST->isThumb2() &&
	L->getNumBlocks() == 1)
	return TTI::AMK_PreIndexed;

	return TTI::AMK_None;
	}

	Optional<Instruction *>
	ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
	using namespace PatternMatch;
	Intrinsic::ID IID = II.getIntrinsicID();
	switch (IID) {
	default:
	break;
	case Intrinsic::arm_neon_vld1: {
	Align MemAlign =
	getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
	&IC.getAssumptionCache(), &IC.getDominatorTree());
	if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) {
	return IC.replaceInstUsesWith(II, V);
	}
	break;
	}

	case Intrinsic::arm_neon_vld2:
	case Intrinsic::arm_neon_vld3:
	case Intrinsic::arm_neon_vld4:
	case Intrinsic::arm_neon_vld2lane:
	case Intrinsic::arm_neon_vld3lane:
	case Intrinsic::arm_neon_vld4lane:
	case Intrinsic::arm_neon_vst1:
	case Intrinsic::arm_neon_vst2:
	case Intrinsic::arm_neon_vst3:
	case Intrinsic::arm_neon_vst4:
	case Intrinsic::arm_neon_vst2lane:
	case Intrinsic::arm_neon_vst3lane:
	case Intrinsic::arm_neon_vst4lane: {
	Align MemAlign =
	getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
	&IC.getAssumptionCache(), &IC.getDominatorTree());
	unsigned AlignArg = II.arg_size() - 1;
	Value *AlignArgOp = II.getArgOperand(AlignArg);
	MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
	if (Align && *Align < MemAlign) {
	return IC.replaceOperand(
	II, AlignArg,
	ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(),
	false));
	}
	break;
	}

	case Intrinsic::arm_mve_pred_i2v: {
	Value *Arg = II.getArgOperand(0);
	Value *ArgArg;
	if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
	PatternMatch::m_Value(ArgArg))) &&
	II.getType() == ArgArg->getType()) {
	return IC.replaceInstUsesWith(II, ArgArg);
	}
	Constant *XorMask;
	if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>(
	PatternMatch::m_Value(ArgArg)),
	PatternMatch::m_Constant(XorMask))) &&
	II.getType() == ArgArg->getType()) {
	if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
	if (CI->getValue().trunc(16).isAllOnes()) {
	auto TrueVector = IC.Builder.CreateVectorSplat(
	cast<FixedVectorType>(II.getType())->getNumElements(),
	IC.Builder.getTrue());
	return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector);
	}
	}
	}
	KnownBits ScalarKnown(32);
	if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16),
	ScalarKnown, 0)) {
	return &II;
	}
	break;
	}
	case Intrinsic::arm_mve_pred_v2i: {
	Value *Arg = II.getArgOperand(0);
	Value *ArgArg;
	if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>(
	PatternMatch::m_Value(ArgArg)))) {
	return IC.replaceInstUsesWith(II, ArgArg);
	}
	if (!II.getMetadata(LLVMContext::MD_range)) {
	Type *IntTy32 = Type::getInt32Ty(II.getContext());
	Metadata *M[] = {
	ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)),
	ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0x10000))};
	II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M));
	return &II;
	}
	break;
	}
	case Intrinsic::arm_mve_vadc:
	case Intrinsic::arm_mve_vadc_predicated: {
	unsigned CarryOp =
	(II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2;
	assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 &&
	"Bad type for intrinsic!");

	KnownBits CarryKnown(32);
	if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29),
	CarryKnown)) {
	return &II;
	}
	break;
	}
	case Intrinsic::arm_mve_vmldava: {
	Instruction *I = cast<Instruction>(&II);
	if (I->hasOneUse()) {
	auto User = cast<Instruction>(I->user_begin());
	Value *OpZ;
	if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) &&
	match(I->getOperand(3), m_Zero())) {
	Value *OpX = I->getOperand(4);
	Value *OpY = I->getOperand(5);
	Type *OpTy = OpX->getType();

	IC.Builder.SetInsertPoint(User);
	Value *V =
	IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy},
	{I->getOperand(0), I->getOperand(1),
	I->getOperand(2), OpZ, OpX, OpY});

	IC.replaceInstUsesWith(*User, V);
	return IC.eraseInstFromFunction(*User);
	}
	}
	return None;
	}
	}
	return None;
	}

	Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
	InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
	APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
	std::function<void(Instruction *, unsigned, APInt, APInt &)>
	SimplifyAndSetOp) const {

	// Compute the demanded bits for a narrowing MVE intrinsic. The TopOpc is the
	// opcode specifying a Top/Bottom instruction, which can change between
	// instructions.
	auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
	unsigned NumElts = cast<FixedVectorType>(II.getType())->getNumElements();
	unsigned IsTop = cast<ConstantInt>(II.getOperand(TopOpc))->getZExtValue();

	// The only odd/even lanes of operand 0 will only be demanded depending
	// on whether this is a top/bottom instruction.
	APInt DemandedElts =
	APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
	: APInt::getHighBitsSet(2, 1));
	SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
	// The other lanes will be defined from the inserted elements.
	UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
	: APInt::getHighBitsSet(2, 1));
	return None;
	};

	switch (II.getIntrinsicID()) {
	default:
	break;
	case Intrinsic::arm_mve_vcvt_narrow:
	SimplifyNarrowInstrTopBottom(2);
	break;
	case Intrinsic::arm_mve_vqmovn:
	SimplifyNarrowInstrTopBottom(4);
	break;
	case Intrinsic::arm_mve_vshrn:
	SimplifyNarrowInstrTopBottom(7);
	break;
	}

	return None;
	}

	InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
	TTI::TargetCostKind CostKind) {
	assert(Ty->isIntegerTy());

	unsigned Bits = Ty->getPrimitiveSizeInBits();
	if (Bits == 0 \|\| Imm.getActiveBits() >= 64)
	return 4;

	int64_t SImmVal = Imm.getSExtValue();
	uint64_t ZImmVal = Imm.getZExtValue();
	if (!ST->isThumb()) {
	if ((SImmVal >= 0 && SImmVal < 65536) \|\|
	(ARM_AM::getSOImmVal(ZImmVal) != -1) \|\|
	(ARM_AM::getSOImmVal(~ZImmVal) != -1))
	return 1;
	return ST->hasV6T2Ops() ? 2 : 3;
	}
	if (ST->isThumb2()) {
	if ((SImmVal >= 0 && SImmVal < 65536) \|\|
	(ARM_AM::getT2SOImmVal(ZImmVal) != -1) \|\|
	(ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
	return 1;
	return ST->hasV6T2Ops() ? 2 : 3;
	}
	// Thumb1, any i8 imm cost 1.
	if (Bits == 8 \|\| (SImmVal >= 0 && SImmVal < 256))
	return 1;
	if ((~SImmVal < 256) \|\| ARM_AM::isThumbImmShiftedVal(ZImmVal))
	return 2;
	// Load from constantpool.
	return 3;
	}

	// Constants smaller than 256 fit in the immediate field of
	// Thumb1 instructions so we return a zero cost and 1 otherwise.
	InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
	const APInt &Imm, Type *Ty) {
	if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
	return 0;

	return 1;
	}

	// Checks whether Inst is part of a min(max()) or max(min()) pattern
	// that will match to an SSAT instruction. Returns the instruction being
	// saturated, or null if no saturation pattern was found.
	static Value isSSATMinMaxPattern(Instruction Inst, const APInt &Imm) {
	Value LHS, RHS;
	ConstantInt *C;
	SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor;

	if (InstSPF == SPF_SMAX &&
	PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
	C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {

	auto isSSatMin = [&](Value *MinInst) {
	if (isa<SelectInst>(MinInst)) {
	Value MinLHS, MinRHS;
	ConstantInt *MinC;
	SelectPatternFlavor MinSPF =
	matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor;
	if (MinSPF == SPF_SMIN &&
	PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) &&
	MinC->getValue() == ((-Imm) - 1))
	return true;
	}
	return false;
	};

	if (isSSatMin(Inst->getOperand(1)))
	return cast<Instruction>(Inst->getOperand(1))->getOperand(1);
	if (Inst->hasNUses(2) &&
	(isSSatMin(Inst->user_begin()) \|\| isSSatMin((++Inst->user_begin()))))
	return Inst->getOperand(1);
	}
	return nullptr;
	}

	// Look for a FP Saturation pattern, where the instruction can be simplified to
	// a fptosi.sat. max(min(fptosi)). The constant in this case is always free.
	static bool isFPSatMinMaxPattern(Instruction *Inst, const APInt &Imm) {
	if (Imm.getBitWidth() != 64 \|\|
	Imm != APInt::getHighBitsSet(64, 33)) // -2147483648
	return false;
	Value *FP = isSSATMinMaxPattern(Inst, Imm);
	if (!FP && isa<ICmpInst>(Inst) && Inst->hasOneUse())
	FP = isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm);
	if (!FP)
	return false;
	return isa<FPToSIInst>(FP);
	}

	InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
	const APInt &Imm, Type *Ty,
	TTI::TargetCostKind CostKind,
	Instruction *Inst) {
	// Division by a constant can be turned into multiplication, but only if we
	// know it's constant. So it's not so much that the immediate is cheap (it's
	// not), but that the alternative is worse.
	// FIXME: this is probably unneeded with GlobalISel.
	if ((Opcode == Instruction::SDiv \|\| Opcode == Instruction::UDiv \|\|
	Opcode == Instruction::SRem \|\| Opcode == Instruction::URem) &&
	Idx == 1)
	return 0;

	// Leave any gep offsets for the CodeGenPrepare, which will do a better job at
	// splitting any large offsets.
	if (Opcode == Instruction::GetElementPtr && Idx != 0)
	return 0;

	if (Opcode == Instruction::And) {
	// UXTB/UXTH
	if (Imm == 255 \|\| Imm == 65535)
	return 0;
	// Conversion to BIC is free, and means we can use ~Imm instead.
	return std::min(getIntImmCost(Imm, Ty, CostKind),
	getIntImmCost(~Imm, Ty, CostKind));
	}

	if (Opcode == Instruction::Add)
	// Conversion to SUB is free, and means we can use -Imm instead.
	return std::min(getIntImmCost(Imm, Ty, CostKind),
	getIntImmCost(-Imm, Ty, CostKind));

	if (Opcode == Instruction::ICmp && Imm.isNegative() &&
	Ty->getIntegerBitWidth() == 32) {
	int64_t NegImm = -Imm.getSExtValue();
	if (ST->isThumb2() && NegImm < 1<<12)
	// icmp X, #-C -> cmn X, #C
	return 0;
	if (ST->isThumb() && NegImm < 1<<8)
	// icmp X, #-C -> adds X, #C
	return 0;
	}

	// xor a, -1 can always be folded to MVN
	if (Opcode == Instruction::Xor && Imm.isAllOnes())
	return 0;

	// Ensures negative constant of min(max()) or max(min()) patterns that
	// match to SSAT instructions don't get hoisted
	if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) \|\| ST->isThumb2()) &&
	Ty->getIntegerBitWidth() <= 32) {
	if (isSSATMinMaxPattern(Inst, Imm) \|\|
	(isa<ICmpInst>(Inst) && Inst->hasOneUse() &&
	isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm)))
	return 0;
	}

	if (Inst && ST->hasVFP2Base() && isFPSatMinMaxPattern(Inst, Imm))
	return 0;

	// We can convert <= -1 to < 0, which is generally quite cheap.
	if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
	ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
	if (Pred == ICmpInst::ICMP_SGT \|\| Pred == ICmpInst::ICMP_SLE)
	return std::min(getIntImmCost(Imm, Ty, CostKind),
	getIntImmCost(Imm + 1, Ty, CostKind));
	}

	return getIntImmCost(Imm, Ty, CostKind);
	}

	InstructionCost ARMTTIImpl::getCFInstrCost(unsigned Opcode,
	TTI::TargetCostKind CostKind,
	const Instruction *I) {
	if (CostKind == TTI::TCK_RecipThroughput &&
	(ST->hasNEON() \|\| ST->hasMVEIntegerOps())) {
	// FIXME: The vectorizer is highly sensistive to the cost of these
	// instructions, which suggests that it may be using the costs incorrectly.
	// But, for now, just make them free to avoid performance regressions for
	// vector targets.
	return 0;
	}
	return BaseT::getCFInstrCost(Opcode, CostKind, I);
	}

	InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
	Type *Src,
	TTI::CastContextHint CCH,
	TTI::TargetCostKind CostKind,
	const Instruction *I) {
	int ISD = TLI->InstructionOpcodeToISD(Opcode);
	assert(ISD && "Invalid opcode");

	// TODO: Allow non-throughput costs that aren't binary.
	auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost {
	if (CostKind != TTI::TCK_RecipThroughput)
	return Cost == 0 ? 0 : 1;
	return Cost;
	};
	auto IsLegalFPType = [this](EVT VT) {
	EVT EltVT = VT.getScalarType();
	return (EltVT == MVT::f32 && ST->hasVFP2Base()) \|\|
	(EltVT == MVT::f64 && ST->hasFP64()) \|\|
	(EltVT == MVT::f16 && ST->hasFullFP16());
	};

	EVT SrcTy = TLI->getValueType(DL, Src);
	EVT DstTy = TLI->getValueType(DL, Dst);

	if (!SrcTy.isSimple() \|\| !DstTy.isSimple())
	return AdjustCost(
	BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));

	// Extending masked load/Truncating masked stores is expensive because we
	// currently don't split them. This means that we'll likely end up
	// loading/storing each element individually (hence the high cost).
	if ((ST->hasMVEIntegerOps() &&
	(Opcode == Instruction::Trunc \|\| Opcode == Instruction::ZExt \|\|
	Opcode == Instruction::SExt)) \|\|
	(ST->hasMVEFloatOps() &&
	(Opcode == Instruction::FPExt \|\| Opcode == Instruction::FPTrunc) &&
	IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)))
	if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128)
	return 2 * DstTy.getVectorNumElements() *
	ST->getMVEVectorCostFactor(CostKind);

	// The extend of other kinds of load is free
	if (CCH == TTI::CastContextHint::Normal \|\|
	CCH == TTI::CastContextHint::Masked) {
	static const TypeConversionCostTblEntry LoadConversionTbl[] = {
	{ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0},
	{ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0},
	{ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0},
	{ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0},
	{ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0},
	{ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0},
	{ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1},
	{ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1},
	{ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1},
	{ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1},
	{ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1},
	{ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1},
	};
	if (const auto *Entry = ConvertCostTableLookup(
	LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
	return AdjustCost(Entry->Cost);

	static const TypeConversionCostTblEntry MVELoadConversionTbl[] = {
	{ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0},
	{ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0},
	{ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0},
	{ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0},
	{ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0},
	{ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0},
	// The following extend from a legal type to an illegal type, so need to
	// split the load. This introduced an extra load operation, but the
	// extend is still "free".
	{ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1},
	{ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1},
	{ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 3},
	{ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 3},
	{ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1},
	{ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1},
	};
	if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
	if (const auto *Entry =
	ConvertCostTableLookup(MVELoadConversionTbl, ISD,
	DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
	}

	static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = {
	// FPExtends are similar but also require the VCVT instructions.
	{ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1},
	{ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3},
	};
	if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
	if (const auto *Entry =
	ConvertCostTableLookup(MVEFLoadConversionTbl, ISD,
	DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
	}

	// The truncate of a store is free. This is the mirror of extends above.
	static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = {
	{ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0},
	{ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0},
	{ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0},
	{ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1},
	{ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1},
	{ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3},
	{ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1},
	};
	if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
	if (const auto *Entry =
	ConvertCostTableLookup(MVEStoreConversionTbl, ISD,
	SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
	}

	static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = {
	{ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1},
	{ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3},
	};
	if (SrcTy.isVector() && ST->hasMVEFloatOps()) {
	if (const auto *Entry =
	ConvertCostTableLookup(MVEFStoreConversionTbl, ISD,
	SrcTy.getSimpleVT(), DstTy.getSimpleVT()))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
	}
	}

	// NEON vector operations that can extend their inputs.
	if ((ISD == ISD::SIGN_EXTEND \|\| ISD == ISD::ZERO_EXTEND) &&
	I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) {
	static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = {
	// vaddl
	{ ISD::ADD, MVT::v4i32, MVT::v4i16, 0 },
	{ ISD::ADD, MVT::v8i16, MVT::v8i8, 0 },
	// vsubl
	{ ISD::SUB, MVT::v4i32, MVT::v4i16, 0 },
	{ ISD::SUB, MVT::v8i16, MVT::v8i8, 0 },
	// vmull
	{ ISD::MUL, MVT::v4i32, MVT::v4i16, 0 },
	{ ISD::MUL, MVT::v8i16, MVT::v8i8, 0 },
	// vshll
	{ ISD::SHL, MVT::v4i32, MVT::v4i16, 0 },
	{ ISD::SHL, MVT::v8i16, MVT::v8i8, 0 },
	};

	auto User = cast<Instruction>(I->user_begin());
	int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode());
	if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD,
	DstTy.getSimpleVT(),
	SrcTy.getSimpleVT())) {
	return AdjustCost(Entry->Cost);
	}
	}

	// Single to/from double precision conversions.
	if (Src->isVectorTy() && ST->hasNEON() &&
	((ISD == ISD::FP_ROUND && SrcTy.getScalarType() == MVT::f64 &&
	DstTy.getScalarType() == MVT::f32) \|\|
	(ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 &&
	DstTy.getScalarType() == MVT::f64))) {
	static const CostTblEntry NEONFltDblTbl[] = {
	// Vector fptrunc/fpext conversions.
	{ISD::FP_ROUND, MVT::v2f64, 2},
	{ISD::FP_EXTEND, MVT::v2f32, 2},
	{ISD::FP_EXTEND, MVT::v4f32, 4}};

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
	if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
	return AdjustCost(LT.first * Entry->Cost);
	}

	// Some arithmetic, load and store operations have specific instructions
	// to cast up/down their types automatically at no extra cost.
	// TODO: Get these tables to know at least what the related operations are.
	static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
	{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
	{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
	{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
	{ ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },

	// The number of vmovl instructions for the extension.
	{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
	{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
	{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
	{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
	{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
	{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
	{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
	{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
	{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
	{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
	{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
	{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },

	// Operations that we legalize using splitting.
	{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
	{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },

	// Vector float <-> i32 conversions.
	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },

	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 },
	{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
	{ ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 },
	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
	{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
	{ ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
	{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
	{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 },
	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
	{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 },
	{ ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },
	{ ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 },

	{ ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
	{ ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
	{ ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 },
	{ ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 },
	{ ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
	{ ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },

	// Vector double <-> i32 conversions.
	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },

	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 },
	{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
	{ ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },

	{ ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
	{ ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
	{ ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 },
	{ ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 },
	{ ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 },
	{ ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 }
	};

	if (SrcTy.isVector() && ST->hasNEON()) {
	if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
	DstTy.getSimpleVT(),
	SrcTy.getSimpleVT()))
	return AdjustCost(Entry->Cost);
	}

	// Scalar float to integer conversions.
	static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
	{ ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 },
	{ ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 },
	{ ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 },
	{ ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 },
	{ ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 },
	{ ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 },
	{ ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 },
	{ ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 },
	{ ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 },
	{ ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 },
	{ ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 },
	{ ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 },
	{ ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 },
	{ ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 },
	{ ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 },
	{ ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 },
	{ ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 },
	{ ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 },
	{ ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 },
	{ ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 }
	};
	if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
	if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
	DstTy.getSimpleVT(),
	SrcTy.getSimpleVT()))
	return AdjustCost(Entry->Cost);
	}

	// Scalar integer to float conversions.
	static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
	{ ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 },
	{ ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 },
	{ ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 },
	{ ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 },
	{ ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 },
	{ ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 },
	{ ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 },
	{ ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 },
	{ ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 },
	{ ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 },
	{ ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 },
	{ ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 },
	{ ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 },
	{ ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 },
	{ ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 },
	{ ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 },
	{ ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 },
	{ ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 },
	{ ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 },
	{ ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 }
	};

	if (SrcTy.isInteger() && ST->hasNEON()) {
	if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
	ISD, DstTy.getSimpleVT(),
	SrcTy.getSimpleVT()))
	return AdjustCost(Entry->Cost);
	}

	// MVE extend costs, taken from codegen tests. i8->i16 or i16->i32 is one
	// instruction, i8->i32 is two. i64 zexts are an VAND with a constant, sext
	// are linearised so take more.
	static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = {
	{ ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
	{ ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 },
	{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
	{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 },
	{ ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
	{ ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 },
	{ ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 },
	{ ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 },
	};

	if (SrcTy.isVector() && ST->hasMVEIntegerOps()) {
	if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl,
	ISD, DstTy.getSimpleVT(),
	SrcTy.getSimpleVT()))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind);
	}

	if (ISD == ISD::FP_ROUND \|\| ISD == ISD::FP_EXTEND) {
	// As general rule, fp converts that were not matched above are scalarized
	// and cost 1 vcvt for each lane, so long as the instruction is available.
	// If not it will become a series of function calls.
	const InstructionCost CallCost =
	getCallInstrCost(nullptr, Dst, {Src}, CostKind);
	int Lanes = 1;
	if (SrcTy.isFixedLengthVector())
	Lanes = SrcTy.getVectorNumElements();

	if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))
	return Lanes;
	else
	return Lanes * CallCost;
	}

	if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() &&
	SrcTy.isFixedLengthVector()) {
	// Treat a truncate with larger than legal source (128bits for MVE) as
	// expensive, 2 instructions per lane.
	if ((SrcTy.getScalarType() == MVT::i8 \|\|
	SrcTy.getScalarType() == MVT::i16 \|\|
	SrcTy.getScalarType() == MVT::i32) &&
	SrcTy.getSizeInBits() > 128 &&
	SrcTy.getSizeInBits() > DstTy.getSizeInBits())
	return SrcTy.getVectorNumElements() * 2;
	}

	// Scalar integer conversion costs.
	static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
	// i16 -> i64 requires two dependent operations.
	{ ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },

	// Truncates on i64 are assumed to be free.
	{ ISD::TRUNCATE, MVT::i32, MVT::i64, 0 },
	{ ISD::TRUNCATE, MVT::i16, MVT::i64, 0 },
	{ ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
	{ ISD::TRUNCATE, MVT::i1, MVT::i64, 0 }
	};

	if (SrcTy.isInteger()) {
	if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
	DstTy.getSimpleVT(),
	SrcTy.getSimpleVT()))
	return AdjustCost(Entry->Cost);
	}

	int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
	? ST->getMVEVectorCostFactor(CostKind)
	: 1;
	return AdjustCost(
	BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
	}

	InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
	unsigned Index) {
	// Penalize inserting into an D-subregister. We end up with a three times
	// lower estimated throughput on swift.
	if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
	ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
	return 3;

	if (ST->hasNEON() && (Opcode == Instruction::InsertElement \|\|
	Opcode == Instruction::ExtractElement)) {
	// Cross-class copies are expensive on many microarchitectures,
	// so assume they are expensive by default.
	if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy())
	return 3;

	// Even if it's not a cross class copy, this likely leads to mixing
	// of NEON and VFP code and should be therefore penalized.
	if (ValTy->isVectorTy() &&
	ValTy->getScalarSizeInBits() <= 32)
	return std::max<InstructionCost>(
	BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
	}

	if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement \|\|
	Opcode == Instruction::ExtractElement)) {
	// Integer cross-lane moves are more expensive than float, which can
	// sometimes just be vmovs. Integer involve being passes to GPR registers,
	// causing more of a delay.
	std::pair<InstructionCost, MVT> LT =
	getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType());
	return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1);
	}

	return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
	}

	InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
	Type *CondTy,
	CmpInst::Predicate VecPred,
	TTI::TargetCostKind CostKind,
	const Instruction *I) {
	int ISD = TLI->InstructionOpcodeToISD(Opcode);

	// Thumb scalar code size cost for select.
	if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT &&
	ST->isThumb() && !ValTy->isVectorTy()) {
	// Assume expensive structs.
	if (TLI->getValueType(DL, ValTy, true) == MVT::Other)
	return TTI::TCC_Expensive;

	// Select costs can vary because they:
	// - may require one or more conditional mov (including an IT),
	// - can't operate directly on immediates,
	// - require live flags, which we can't copy around easily.
	InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first;

	// Possible IT instruction for Thumb2, or more for Thumb1.
	++Cost;

	// i1 values may need rematerialising by using mov immediates and/or
	// flag setting instructions.
	if (ValTy->isIntegerTy(1))
	++Cost;

	return Cost;
	}

	// If this is a vector min/max/abs, use the cost of that intrinsic directly
	// instead. Hopefully when min/max intrinsics are more prevalent this code
	// will not be needed.
	const Instruction *Sel = I;
	if ((Opcode == Instruction::ICmp \|\| Opcode == Instruction::FCmp) && Sel &&
	Sel->hasOneUse())
	Sel = cast<Instruction>(Sel->user_back());
	if (Sel && ValTy->isVectorTy() &&
	(ValTy->isIntOrIntVectorTy() \|\| ValTy->isFPOrFPVectorTy())) {
	const Value LHS, RHS;
	SelectPatternFlavor SPF = matchSelectPattern(Sel, LHS, RHS).Flavor;
	unsigned IID = 0;
	switch (SPF) {
	case SPF_ABS:
	IID = Intrinsic::abs;
	break;
	case SPF_SMIN:
	IID = Intrinsic::smin;
	break;
	case SPF_SMAX:
	IID = Intrinsic::smax;
	break;
	case SPF_UMIN:
	IID = Intrinsic::umin;
	break;
	case SPF_UMAX:
	IID = Intrinsic::umax;
	break;
	case SPF_FMINNUM:
	IID = Intrinsic::minnum;
	break;
	case SPF_FMAXNUM:
	IID = Intrinsic::maxnum;
	break;
	default:
	break;
	}
	if (IID) {
	// The ICmp is free, the select gets the cost of the min/max/etc
	if (Sel != I)
	return 0;
	IntrinsicCostAttributes CostAttrs(IID, ValTy, {ValTy, ValTy});
	return getIntrinsicInstrCost(CostAttrs, CostKind);
	}
	}

	// On NEON a vector select gets lowered to vbsl.
	if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) {
	// Lowering of some vector selects is currently far from perfect.
	static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
	{ ISD::SELECT, MVT::v4i1, MVT::v4i64, 44 + 12 + 1 },
	{ ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
	{ ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
	};

	EVT SelCondTy = TLI->getValueType(DL, CondTy);
	EVT SelValTy = TLI->getValueType(DL, ValTy);
	if (SelCondTy.isSimple() && SelValTy.isSimple()) {
	if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
	SelCondTy.getSimpleVT(),
	SelValTy.getSimpleVT()))
	return Entry->Cost;
	}

	std::pair<InstructionCost, MVT> LT =
	TLI->getTypeLegalizationCost(DL, ValTy);
	return LT.first;
	}

	if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() &&
	(Opcode == Instruction::ICmp \|\| Opcode == Instruction::FCmp) &&
	cast<FixedVectorType>(ValTy)->getNumElements() > 1) {
	FixedVectorType *VecValTy = cast<FixedVectorType>(ValTy);
	FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy);
	if (!VecCondTy)
	VecCondTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(VecValTy));

	// If we don't have mve.fp any fp operations will need to be scalarized.
	if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) {
	// One scalaization insert, one scalarization extract and the cost of the
	// fcmps.
	return BaseT::getScalarizationOverhead(VecValTy, false, true) +
	BaseT::getScalarizationOverhead(VecCondTy, true, false) +
	VecValTy->getNumElements() *
	getCmpSelInstrCost(Opcode, ValTy->getScalarType(),
	VecCondTy->getScalarType(), VecPred, CostKind,
	I);
	}

	std::pair<InstructionCost, MVT> LT =
	TLI->getTypeLegalizationCost(DL, ValTy);
	int BaseCost = ST->getMVEVectorCostFactor(CostKind);
	// There are two types - the input that specifies the type of the compare
	// and the output vXi1 type. Because we don't know how the output will be
	// split, we may need an expensive shuffle to get two in sync. This has the
	// effect of making larger than legal compares (v8i32 for example)
	// expensive.
	- if (LT.second.getVectorNumElements() > 2) {
	+ if (LT.second.isVector() && LT.second.getVectorNumElements() > 2) {
	if (LT.first > 1)
	return LT.first * BaseCost +
	BaseT::getScalarizationOverhead(VecCondTy, true, false);
	return BaseCost;
	}
	}

	// Default to cheap (throughput/size of 1 instruction) but adjust throughput
	// for "multiple beats" potentially needed by MVE instructions.
	int BaseCost = 1;
	if (ST->hasMVEIntegerOps() && ValTy->isVectorTy())
	BaseCost = ST->getMVEVectorCostFactor(CostKind);

	return BaseCost *
	BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
	}

	InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty,
	ScalarEvolution *SE,
	const SCEV *Ptr) {
	// Address computations in vectorized code with non-consecutive addresses will
	// likely result in more instructions compared to scalar code where the
	// computation can more often be merged into the index mode. The resulting
	// extra micro-ops can significantly decrease throughput.
	unsigned NumVectorInstToHideOverhead = 10;
	int MaxMergeDistance = 64;

	if (ST->hasNEON()) {
	if (Ty->isVectorTy() && SE &&
	!BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
	return NumVectorInstToHideOverhead;

	// In many cases the address computation is not merged into the instruction
	// addressing mode.
	return 1;
	}
	return BaseT::getAddressComputationCost(Ty, SE, Ptr);
	}

	bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) {
	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
	// If a VCTP is part of a chain, it's already profitable and shouldn't be
	// optimized, else LSR may block tail-predication.
	switch (II->getIntrinsicID()) {
	case Intrinsic::arm_mve_vctp8:
	case Intrinsic::arm_mve_vctp16:
	case Intrinsic::arm_mve_vctp32:
	case Intrinsic::arm_mve_vctp64:
	return true;
	default:
	break;
	}
	}
	return false;
	}

	bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
	if (!EnableMaskedLoadStores \|\| !ST->hasMVEIntegerOps())
	return false;

	if (auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) {
	// Don't support v2i1 yet.
	if (VecTy->getNumElements() == 2)
	return false;

	// We don't support extending fp types.
	unsigned VecWidth = DataTy->getPrimitiveSizeInBits();
	if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy())
	return false;
	}

	unsigned EltWidth = DataTy->getScalarSizeInBits();
	return (EltWidth == 32 && Alignment >= 4) \|\|
	(EltWidth == 16 && Alignment >= 2) \|\| (EltWidth == 8);
	}

	bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) {
	if (!EnableMaskedGatherScatters \|\| !ST->hasMVEIntegerOps())
	return false;

	unsigned EltWidth = Ty->getScalarSizeInBits();
	return ((EltWidth == 32 && Alignment >= 4) \|\|
	(EltWidth == 16 && Alignment >= 2) \|\| EltWidth == 8);
	}

	/// Given a memcpy/memset/memmove instruction, return the number of memory
	/// operations performed, via querying findOptimalMemOpLowering. Returns -1 if a
	/// call is used.
	int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const {
	MemOp MOp;
	unsigned DstAddrSpace = ~0u;
	unsigned SrcAddrSpace = ~0u;
	const Function *F = I->getParent()->getParent();

	if (const auto *MC = dyn_cast<MemTransferInst>(I)) {
	ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength());
	// If 'size' is not a constant, a library call will be generated.
	if (!C)
	return -1;

	const unsigned Size = C->getValue().getZExtValue();
	const Align DstAlign = *MC->getDestAlign();
	const Align SrcAlign = *MC->getSourceAlign();

	MOp = MemOp::Copy(Size, /DstAlignCanChange/ false, DstAlign, SrcAlign,
	/IsVolatile/ false);
	DstAddrSpace = MC->getDestAddressSpace();
	SrcAddrSpace = MC->getSourceAddressSpace();
	}
	else if (const auto *MS = dyn_cast<MemSetInst>(I)) {
	ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength());
	// If 'size' is not a constant, a library call will be generated.
	if (!C)
	return -1;

	const unsigned Size = C->getValue().getZExtValue();
	const Align DstAlign = *MS->getDestAlign();

	MOp = MemOp::Set(Size, /DstAlignCanChange/ false, DstAlign,
	/IsZeroMemset/ false, /IsVolatile/ false);
	DstAddrSpace = MS->getDestAddressSpace();
	}
	else
	llvm_unreachable("Expected a memcpy/move or memset!");

	unsigned Limit, Factor = 2;
	switch(I->getIntrinsicID()) {
	case Intrinsic::memcpy:
	Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize());
	break;
	case Intrinsic::memmove:
	Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize());
	break;
	case Intrinsic::memset:
	Limit = TLI->getMaxStoresPerMemset(F->hasMinSize());
	Factor = 1;
	break;
	default:
	llvm_unreachable("Expected a memcpy/move or memset!");
	}

	// MemOps will be poplulated with a list of data types that needs to be
	// loaded and stored. That's why we multiply the number of elements by 2 to
	// get the cost for this memcpy.
	std::vector<EVT> MemOps;
	if (getTLI()->findOptimalMemOpLowering(
	MemOps, Limit, MOp, DstAddrSpace,
	SrcAddrSpace, F->getAttributes()))
	return MemOps.size() * Factor;

	// If we can't find an optimal memop lowering, return the default cost
	return -1;
	}

	InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
	int NumOps = getNumMemOps(cast<IntrinsicInst>(I));

	// To model the cost of a library call, we assume 1 for the call, and
	// 3 for the argument setup.
	if (NumOps == -1)
	return 4;
	return NumOps;
	}

	InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
	VectorType *Tp, ArrayRef<int> Mask,
	int Index, VectorType *SubTp,
	ArrayRef<const Value *> Args) {
	Kind = improveShuffleKindFromMask(Kind, Mask);
	if (ST->hasNEON()) {
	if (Kind == TTI::SK_Broadcast) {
	static const CostTblEntry NEONDupTbl[] = {
	// VDUP handles these cases.
	{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},

	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}};

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
	if (const auto *Entry =
	CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second))
	return LT.first * Entry->Cost;
	}
	if (Kind == TTI::SK_Reverse) {
	static const CostTblEntry NEONShuffleTbl[] = {
	// Reverse shuffle cost one instruction if we are shuffling within a
	// double word (vrev) or two if we shuffle a quad word (vrev, vext).
	{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v4i16, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v8i8, 1},

	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
	if (const auto *Entry =
	CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
	return LT.first * Entry->Cost;
	}
	if (Kind == TTI::SK_Select) {
	static const CostTblEntry NEONSelShuffleTbl[] = {
	// Select shuffle cost table for ARM. Cost is the number of
	// instructions
	// required to create the shuffled vector.

	{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},

	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
	{ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},

	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},

	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
	if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
	ISD::VECTOR_SHUFFLE, LT.second))
	return LT.first * Entry->Cost;
	}
	}
	if (ST->hasMVEIntegerOps()) {
	if (Kind == TTI::SK_Broadcast) {
	static const CostTblEntry MVEDupTbl[] = {
	// VDUP handles these cases.
	{ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v16i8, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},
	{ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}};

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
	if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE,
	LT.second))
	return LT.first * Entry->Cost *
	ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput);
	}

	if (!Mask.empty()) {
	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
	if (LT.second.isVector() &&
	Mask.size() <= LT.second.getVectorNumElements() &&
	(isVREVMask(Mask, LT.second, 16) \|\| isVREVMask(Mask, LT.second, 32) \|\|
	isVREVMask(Mask, LT.second, 64)))
	return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first;
	}
	}

	int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
	? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput)
	: 1;
	return BaseCost * BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
	}

	InstructionCost ARMTTIImpl::getArithmeticInstrCost(
	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
	TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info,
	TTI::OperandValueProperties Opd1PropInfo,
	TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
	const Instruction *CxtI) {
	int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
	if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) {
	// Make operations on i1 relatively expensive as this often involves
	// combining predicates. AND and XOR should be easier to handle with IT
	// blocks.
	switch (ISDOpcode) {
	default:
	break;
	case ISD::AND:
	case ISD::XOR:
	return 2;
	case ISD::OR:
	return 3;
	}
	}

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);

	if (ST->hasNEON()) {
	const unsigned FunctionCallDivCost = 20;
	const unsigned ReciprocalDivCost = 10;
	static const CostTblEntry CostTbl[] = {
	// Division.
	// These costs are somewhat random. Choose a cost of 20 to indicate that
	// vectorizing devision (added function call) is going to be very expensive.
	// Double registers types.
	{ ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
	{ ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
	{ ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
	{ ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
	{ ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
	// Quad register types.
	{ ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
	{ ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
	{ ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
	{ ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
	{ ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
	// Multiplication.
	};

	if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
	return LT.first * Entry->Cost;

	InstructionCost Cost = BaseT::getArithmeticInstrCost(
	Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo);

	// This is somewhat of a hack. The problem that we are facing is that SROA
	// creates a sequence of shift, and, or instructions to construct values.
	// These sequences are recognized by the ISel and have zero-cost. Not so for
	// the vectorized code. Because we have support for v2i64 but not i64 those
	// sequences look particularly beneficial to vectorize.
	// To work around this we increase the cost of v2i64 operations to make them
	// seem less beneficial.
	if (LT.second == MVT::v2i64 &&
	Op2Info == TargetTransformInfo::OK_UniformConstantValue)
	Cost += 4;

	return Cost;
	}

	// If this operation is a shift on arm/thumb2, it might well be folded into
	// the following instruction, hence having a cost of 0.
	auto LooksLikeAFreeShift = [&]() {
	if (ST->isThumb1Only() \|\| Ty->isVectorTy())
	return false;

	if (!CxtI \|\| !CxtI->hasOneUse() \|\| !CxtI->isShift())
	return false;
	if (Op2Info != TargetTransformInfo::OK_UniformConstantValue)
	return false;

	// Folded into a ADC/ADD/AND/BIC/CMP/EOR/MVN/ORR/ORN/RSB/SBC/SUB
	switch (cast<Instruction>(CxtI->user_back())->getOpcode()) {
	case Instruction::Add:
	case Instruction::Sub:
	case Instruction::And:
	case Instruction::Xor:
	case Instruction::Or:
	case Instruction::ICmp:
	return true;
	default:
	return false;
	}
	};
	if (LooksLikeAFreeShift())
	return 0;

	// Default to cheap (throughput/size of 1 instruction) but adjust throughput
	// for "multiple beats" potentially needed by MVE instructions.
	int BaseCost = 1;
	if (ST->hasMVEIntegerOps() && Ty->isVectorTy())
	BaseCost = ST->getMVEVectorCostFactor(CostKind);

	// The rest of this mostly follows what is done in BaseT::getArithmeticInstrCost,
	// without treating floats as more expensive that scalars or increasing the
	// costs for custom operations. The results is also multiplied by the
	// MVEVectorCostFactor where appropriate.
	if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second))
	return LT.first * BaseCost;

	// Else this is expand, assume that we need to scalarize this op.
	if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
	unsigned Num = VTy->getNumElements();
	InstructionCost Cost =
	getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind);
	// Return the cost of multiple scalar invocation plus the cost of
	// inserting and extracting the values.
	SmallVector<Type *> Tys(Args.size(), Ty);
	return BaseT::getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
	}

	return BaseCost;
	}

	InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
	MaybeAlign Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind,
	const Instruction *I) {
	// TODO: Handle other cost kinds.
	if (CostKind != TTI::TCK_RecipThroughput)
	return 1;

	// Type legalization can't handle structs
	if (TLI->getValueType(DL, Src, true) == MVT::Other)
	return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
	CostKind);

	if (ST->hasNEON() && Src->isVectorTy() &&
	(Alignment && *Alignment != Align(16)) &&
	cast<VectorType>(Src)->getElementType()->isDoubleTy()) {
	// Unaligned loads/stores are extremely inefficient.
	// We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
	return LT.first * 4;
	}

	// MVE can optimize a fpext(load(4xhalf)) using an extending integer load.
	// Same for stores.
	if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) && I &&
	((Opcode == Instruction::Load && I->hasOneUse() &&
	isa<FPExtInst>(*I->user_begin())) \|\|
	(Opcode == Instruction::Store && isa<FPTruncInst>(I->getOperand(0))))) {
	FixedVectorType *SrcVTy = cast<FixedVectorType>(Src);
	Type *DstTy =
	Opcode == Instruction::Load
	? (*I->user_begin())->getType()
	: cast<Instruction>(I->getOperand(0))->getOperand(0)->getType();
	if (SrcVTy->getNumElements() == 4 && SrcVTy->getScalarType()->isHalfTy() &&
	DstTy->getScalarType()->isFloatTy())
	return ST->getMVEVectorCostFactor(CostKind);
	}

	int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy()
	? ST->getMVEVectorCostFactor(CostKind)
	: 1;
	return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
	CostKind, I);
	}

	InstructionCost
	ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
	unsigned AddressSpace,
	TTI::TargetCostKind CostKind) {
	if (ST->hasMVEIntegerOps()) {
	if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment))
	return ST->getMVEVectorCostFactor(CostKind);
	if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment))
	return ST->getMVEVectorCostFactor(CostKind);
	}
	if (!isa<FixedVectorType>(Src))
	return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
	CostKind);
	// Scalar cost, which is currently very high due to the efficiency of the
	// generated code.
	return cast<FixedVectorType>(Src)->getNumElements() * 8;
	}

	InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost(
	unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
	bool UseMaskForCond, bool UseMaskForGaps) {
	assert(Factor >= 2 && "Invalid interleave factor");
	assert(isa<VectorType>(VecTy) && "Expect a vector type");

	// vldN/vstN doesn't support vector types of i64/f64 element.
	bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;

	if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
	!UseMaskForCond && !UseMaskForGaps) {
	unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements();
	auto *SubVecTy =
	FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);

	// vldN/vstN only support legal vector types of size 64 or 128 in bits.
	// Accesses having vector types that are a multiple of 128 bits can be
	// matched to more than one vldN/vstN instruction.
	int BaseCost =
	ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(CostKind) : 1;
	if (NumElts % Factor == 0 &&
	TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
	return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL);

	// Some smaller than legal interleaved patterns are cheap as we can make
	// use of the vmovn or vrev patterns to interleave a standard load. This is
	// true for v4i8, v8i8 and v4i16 at least (but not for v4f16 as it is
	// promoted differently). The cost of 2 here is then a load and vrev or
	// vmovn.
	if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 &&
	VecTy->isIntOrIntVectorTy() &&
	DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64)
	return 2 * BaseCost;
	}

	return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
	Alignment, AddressSpace, CostKind,
	UseMaskForCond, UseMaskForGaps);
	}

	InstructionCost ARMTTIImpl::getGatherScatterOpCost(
	unsigned Opcode, Type DataTy, const Value Ptr, bool VariableMask,
	Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
	using namespace PatternMatch;
	if (!ST->hasMVEIntegerOps() \|\| !EnableMaskedGatherScatters)
	return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
	Alignment, CostKind, I);

	assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!");
	auto *VTy = cast<FixedVectorType>(DataTy);

	// TODO: Splitting, once we do that.

	unsigned NumElems = VTy->getNumElements();
	unsigned EltSize = VTy->getScalarSizeInBits();
	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy);

	// For now, it is assumed that for the MVE gather instructions the loads are
	// all effectively serialised. This means the cost is the scalar cost
	// multiplied by the number of elements being loaded. This is possibly very
	// conservative, but even so we still end up vectorising loops because the
	// cost per iteration for many loops is lower than for scalar loops.
	InstructionCost VectorCost =
	NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
	// The scalarization cost should be a lot higher. We use the number of vector
	// elements plus the scalarization overhead.
	InstructionCost ScalarCost =
	NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, true, false) +
	BaseT::getScalarizationOverhead(VTy, false, true);

	if (EltSize < 8 \|\| Alignment < EltSize / 8)
	return ScalarCost;

	unsigned ExtSize = EltSize;
	// Check whether there's a single user that asks for an extended type
	if (I != nullptr) {
	// Dependent of the caller of this function, a gather instruction will
	// either have opcode Instruction::Load or be a call to the masked_gather
	// intrinsic
	if ((I->getOpcode() == Instruction::Load \|\|
	match(I, m_Intrinsic<Intrinsic::masked_gather>())) &&
	I->hasOneUse()) {
	const User Us = I->users().begin();
	if (isa<ZExtInst>(Us) \|\| isa<SExtInst>(Us)) {
	// only allow valid type combinations
	unsigned TypeSize =
	cast<Instruction>(Us)->getType()->getScalarSizeInBits();
	if (((TypeSize == 32 && (EltSize == 8 \|\| EltSize == 16)) \|\|
	(TypeSize == 16 && EltSize == 8)) &&
	TypeSize * NumElems == 128) {
	ExtSize = TypeSize;
	}
	}
	}
	// Check whether the input data needs to be truncated
	TruncInst *T;
	if ((I->getOpcode() == Instruction::Store \|\|
	match(I, m_Intrinsic<Intrinsic::masked_scatter>())) &&
	(T = dyn_cast<TruncInst>(I->getOperand(0)))) {
	// Only allow valid type combinations
	unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits();
	if (((EltSize == 16 && TypeSize == 32) \|\|
	(EltSize == 8 && (TypeSize == 32 \|\| TypeSize == 16))) &&
	TypeSize * NumElems == 128)
	ExtSize = TypeSize;
	}
	}

	if (ExtSize * NumElems != 128 \|\| NumElems < 4)
	return ScalarCost;

	// Any (aligned) i32 gather will not need to be scalarised.
	if (ExtSize == 32)
	return VectorCost;
	// For smaller types, we need to ensure that the gep's inputs are correctly
	// extended from a small enough value. Other sizes (including i64) are
	// scalarized for now.
	if (ExtSize != 8 && ExtSize != 16)
	return ScalarCost;

	if (const auto *BC = dyn_cast<BitCastInst>(Ptr))
	Ptr = BC->getOperand(0);
	if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
	if (GEP->getNumOperands() != 2)
	return ScalarCost;
	unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType());
	// Scale needs to be correct (which is only relevant for i16s).
	if (Scale != 1 && Scale * 8 != ExtSize)
	return ScalarCost;
	// And we need to zext (not sext) the indexes from a small enough type.
	if (const auto *ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1))) {
	if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize)
	return VectorCost;
	}
	return ScalarCost;
	}
	return ScalarCost;
	}

	InstructionCost
	ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
	Optional<FastMathFlags> FMF,
	TTI::TargetCostKind CostKind) {
	if (TTI::requiresOrderedReduction(FMF))
	return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

	EVT ValVT = TLI->getValueType(DL, ValTy);
	int ISD = TLI->InstructionOpcodeToISD(Opcode);
	if (!ST->hasMVEIntegerOps() \|\| !ValVT.isSimple() \|\| ISD != ISD::ADD)
	return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);

	static const CostTblEntry CostTblAdd[]{
	{ISD::ADD, MVT::v16i8, 1},
	{ISD::ADD, MVT::v8i16, 1},
	{ISD::ADD, MVT::v4i32, 1},
	};
	if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second))
	return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first;

	return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
	}

	InstructionCost
	ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
	Type ResTy, VectorType ValTy,
	TTI::TargetCostKind CostKind) {
	EVT ValVT = TLI->getValueType(DL, ValTy);
	EVT ResVT = TLI->getValueType(DL, ResTy);

	if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
	std::pair<InstructionCost, MVT> LT =
	TLI->getTypeLegalizationCost(DL, ValTy);

	// The legal cases are:
	// VADDV u/s 8/16/32
	// VMLAV u/s 8/16/32
	// VADDLV u/s 32
	// VMLALV u/s 16/32
	// Codegen currently cannot always handle larger than legal vectors very
	// well, especially for predicated reductions where the mask needs to be
	// split, so restrict to 128bit or smaller input types.
	unsigned RevVTSize = ResVT.getSizeInBits();
	if (ValVT.getSizeInBits() <= 128 &&
	((LT.second == MVT::v16i8 && RevVTSize <= 32) \|\|
	(LT.second == MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) \|\|
	(LT.second == MVT::v4i32 && RevVTSize <= 64)))
	return ST->getMVEVectorCostFactor(CostKind) * LT.first;
	}

	return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy,
	CostKind);
	}

	InstructionCost
	ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
	TTI::TargetCostKind CostKind) {
	switch (ICA.getID()) {
	case Intrinsic::get_active_lane_mask:
	// Currently we make a somewhat optimistic assumption that
	// active_lane_mask's are always free. In reality it may be freely folded
	// into a tail predicated loop, expanded into a VCPT or expanded into a lot
	// of add/icmp code. We may need to improve this in the future, but being
	// able to detect if it is free or not involves looking at a lot of other
	// code. We currently assume that the vectorizer inserted these, and knew
	// what it was doing in adding one.
	if (ST->hasMVEIntegerOps())
	return 0;
	break;
	case Intrinsic::sadd_sat:
	case Intrinsic::ssub_sat:
	case Intrinsic::uadd_sat:
	case Intrinsic::usub_sat: {
	if (!ST->hasMVEIntegerOps())
	break;
	Type *VT = ICA.getReturnType();

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
	if (LT.second == MVT::v4i32 \|\| LT.second == MVT::v8i16 \|\|
	LT.second == MVT::v16i8) {
	// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
	// need to extend the type, as it uses shr(qadd(shl, shl)).
	unsigned Instrs =
	LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4;
	return LT.first * ST->getMVEVectorCostFactor(CostKind) * Instrs;
	}
	break;
	}
	case Intrinsic::abs:
	case Intrinsic::smin:
	case Intrinsic::smax:
	case Intrinsic::umin:
	case Intrinsic::umax: {
	if (!ST->hasMVEIntegerOps())
	break;
	Type *VT = ICA.getReturnType();

	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
	if (LT.second == MVT::v4i32 \|\| LT.second == MVT::v8i16 \|\|
	LT.second == MVT::v16i8)
	return LT.first * ST->getMVEVectorCostFactor(CostKind);
	break;
	}
	case Intrinsic::minnum:
	case Intrinsic::maxnum: {
	if (!ST->hasMVEFloatOps())
	break;
	Type *VT = ICA.getReturnType();
	std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT);
	if (LT.second == MVT::v4f32 \|\| LT.second == MVT::v8f16)
	return LT.first * ST->getMVEVectorCostFactor(CostKind);
	break;
	}
	case Intrinsic::fptosi_sat:
	case Intrinsic::fptoui_sat: {
	if (ICA.getArgTypes().empty())
	break;
	bool IsSigned = ICA.getID() == Intrinsic::fptosi_sat;
	auto LT = TLI->getTypeLegalizationCost(DL, ICA.getArgTypes()[0]);
	EVT MTy = TLI->getValueType(DL, ICA.getReturnType());
	// Check for the legal types, with the corect subtarget features.
	if ((ST->hasVFP2Base() && LT.second == MVT::f32 && MTy == MVT::i32) \|\|
	(ST->hasFP64() && LT.second == MVT::f64 && MTy == MVT::i32) \|\|
	(ST->hasFullFP16() && LT.second == MVT::f16 && MTy == MVT::i32))
	return LT.first;

	// Equally for MVE vector types
	if (ST->hasMVEFloatOps() &&
	(LT.second == MVT::v4f32 \|\| LT.second == MVT::v8f16) &&
	LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())
	return LT.first * ST->getMVEVectorCostFactor(CostKind);

	// Otherwise we use a legal convert followed by a min+max
	if (((ST->hasVFP2Base() && LT.second == MVT::f32) \|\|
	(ST->hasFP64() && LT.second == MVT::f64) \|\|
	(ST->hasFullFP16() && LT.second == MVT::f16) \|\|
	(ST->hasMVEFloatOps() &&
	(LT.second == MVT::v4f32 \|\| LT.second == MVT::v8f16))) &&
	LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) {
	Type *LegalTy = Type::getIntNTy(ICA.getReturnType()->getContext(),
	LT.second.getScalarSizeInBits());
	InstructionCost Cost =
	LT.second.isVector() ? ST->getMVEVectorCostFactor(CostKind) : 1;
	IntrinsicCostAttributes Attrs1(IsSigned ? Intrinsic::smin
	: Intrinsic::umin,
	LegalTy, {LegalTy, LegalTy});
	Cost += getIntrinsicInstrCost(Attrs1, CostKind);
	IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax
	: Intrinsic::umax,
	LegalTy, {LegalTy, LegalTy});
	Cost += getIntrinsicInstrCost(Attrs2, CostKind);
	return LT.first * Cost;
	}
	break;
	}
	}

	return BaseT::getIntrinsicInstrCost(ICA, CostKind);
	}

	bool ARMTTIImpl::isLoweredToCall(const Function *F) {
	if (!F->isIntrinsic())
	return BaseT::isLoweredToCall(F);

	// Assume all Arm-specific intrinsics map to an instruction.
	if (F->getName().startswith("llvm.arm"))
	return false;

	switch (F->getIntrinsicID()) {
	default: break;
	case Intrinsic::powi:
	case Intrinsic::sin:
	case Intrinsic::cos:
	case Intrinsic::pow:
	case Intrinsic::log:
	case Intrinsic::log10:
	case Intrinsic::log2:
	case Intrinsic::exp:
	case Intrinsic::exp2:
	return true;
	case Intrinsic::sqrt:
	case Intrinsic::fabs:
	case Intrinsic::copysign:
	case Intrinsic::floor:
	case Intrinsic::ceil:
	case Intrinsic::trunc:
	case Intrinsic::rint:
	case Intrinsic::nearbyint:
	case Intrinsic::round:
	case Intrinsic::canonicalize:
	case Intrinsic::lround:
	case Intrinsic::llround:
	case Intrinsic::lrint:
	case Intrinsic::llrint:
	if (F->getReturnType()->isDoubleTy() && !ST->hasFP64())
	return true;
	if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16())
	return true;
	// Some operations can be handled by vector instructions and assume
	// unsupported vectors will be expanded into supported scalar ones.
	// TODO Handle scalar operations properly.
	return !ST->hasFPARMv8Base() && !ST->hasVFP2Base();
	case Intrinsic::masked_store:
	case Intrinsic::masked_load:
	case Intrinsic::masked_gather:
	case Intrinsic::masked_scatter:
	return !ST->hasMVEIntegerOps();
	case Intrinsic::sadd_with_overflow:
	case Intrinsic::uadd_with_overflow:
	case Intrinsic::ssub_with_overflow:
	case Intrinsic::usub_with_overflow:
	case Intrinsic::sadd_sat:
	case Intrinsic::uadd_sat:
	case Intrinsic::ssub_sat:
	case Intrinsic::usub_sat:
	return false;
	}

	return BaseT::isLoweredToCall(F);
	}

	bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) {
	unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode());
	EVT VT = TLI->getValueType(DL, I.getType(), true);
	if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall)
	return true;

	// Check if an intrinsic will be lowered to a call and assume that any
	// other CallInst will generate a bl.
	if (auto *Call = dyn_cast<CallInst>(&I)) {
	if (auto *II = dyn_cast<IntrinsicInst>(Call)) {
	switch(II->getIntrinsicID()) {
	case Intrinsic::memcpy:
	case Intrinsic::memset:
	case Intrinsic::memmove:
	return getNumMemOps(II) == -1;
	default:
	if (const Function *F = Call->getCalledFunction())
	return isLoweredToCall(F);
	}
	}
	return true;
	}

	// FPv5 provides conversions between integer, double-precision,
	// single-precision, and half-precision formats.
	switch (I.getOpcode()) {
	default:
	break;
	case Instruction::FPToSI:
	case Instruction::FPToUI:
	case Instruction::SIToFP:
	case Instruction::UIToFP:
	case Instruction::FPTrunc:
	case Instruction::FPExt:
	return !ST->hasFPARMv8Base();
	}

	// FIXME: Unfortunately the approach of checking the Operation Action does
	// not catch all cases of Legalization that use library calls. Our
	// Legalization step categorizes some transformations into library calls as
	// Custom, Expand or even Legal when doing type legalization. So for now
	// we have to special case for instance the SDIV of 64bit integers and the
	// use of floating point emulation.
	if (VT.isInteger() && VT.getSizeInBits() >= 64) {
	switch (ISD) {
	default:
	break;
	case ISD::SDIV:
	case ISD::UDIV:
	case ISD::SREM:
	case ISD::UREM:
	case ISD::SDIVREM:
	case ISD::UDIVREM:
	return true;
	}
	}

	// Assume all other non-float operations are supported.
	if (!VT.isFloatingPoint())
	return false;

	// We'll need a library call to handle most floats when using soft.
	if (TLI->useSoftFloat()) {
	switch (I.getOpcode()) {
	default:
	return true;
	case Instruction::Alloca:
	case Instruction::Load:
	case Instruction::Store:
	case Instruction::Select:
	case Instruction::PHI:
	return false;
	}
	}

	// We'll need a libcall to perform double precision operations on a single
	// precision only FPU.
	if (I.getType()->isDoubleTy() && !ST->hasFP64())
	return true;

	// Likewise for half precision arithmetic.
	if (I.getType()->isHalfTy() && !ST->hasFullFP16())
	return true;

	return false;
	}

	bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
	AssumptionCache &AC,
	TargetLibraryInfo *LibInfo,
	HardwareLoopInfo &HWLoopInfo) {
	// Low-overhead branches are only supported in the 'low-overhead branch'
	// extension of v8.1-m.
	if (!ST->hasLOB() \|\| DisableLowOverheadLoops) {
	LLVM_DEBUG(dbgs() << "ARMHWLoops: Disabled\n");
	return false;
	}

	if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
	LLVM_DEBUG(dbgs() << "ARMHWLoops: No BETC\n");
	return false;
	}

	const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
	if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
	LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n");
	return false;
	}

	const SCEV *TripCountSCEV =
	SE.getAddExpr(BackedgeTakenCount,
	SE.getOne(BackedgeTakenCount->getType()));

	// We need to store the trip count in LR, a 32-bit register.
	if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) {
	LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n");
	return false;
	}

	// Making a call will trash LR and clear LO_BRANCH_INFO, so there's little
	// point in generating a hardware loop if that's going to happen.

	auto IsHardwareLoopIntrinsic = [](Instruction &I) {
	if (auto *Call = dyn_cast<IntrinsicInst>(&I)) {
	switch (Call->getIntrinsicID()) {
	default:
	break;
	case Intrinsic::start_loop_iterations:
	case Intrinsic::test_start_loop_iterations:
	case Intrinsic::loop_decrement:
	case Intrinsic::loop_decrement_reg:
	return true;
	}
	}
	return false;
	};

	// Scan the instructions to see if there's any that we know will turn into a
	// call or if this loop is already a low-overhead loop or will become a tail
	// predicated loop.
	bool IsTailPredLoop = false;
	auto ScanLoop = [&](Loop *L) {
	for (auto *BB : L->getBlocks()) {
	for (auto &I : *BB) {
	if (maybeLoweredToCall(I) \|\| IsHardwareLoopIntrinsic(I) \|\|
	isa<InlineAsm>(I)) {
	LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n");
	return false;
	}
	if (auto *II = dyn_cast<IntrinsicInst>(&I))
	IsTailPredLoop \|=
	II->getIntrinsicID() == Intrinsic::get_active_lane_mask \|\|
	II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 \|\|
	II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 \|\|
	II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 \|\|
	II->getIntrinsicID() == Intrinsic::arm_mve_vctp64;
	}
	}
	return true;
	};

	// Visit inner loops.
	for (auto Inner : *L)
	if (!ScanLoop(Inner))
	return false;

	if (!ScanLoop(L))
	return false;

	// TODO: Check whether the trip count calculation is expensive. If L is the
	// inner loop but we know it has a low trip count, calculating that trip
	// count (in the parent loop) may be detrimental.

	LLVMContext &C = L->getHeader()->getContext();
	HWLoopInfo.CounterInReg = true;
	HWLoopInfo.IsNestingLegal = false;
	HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop;
	HWLoopInfo.CountType = Type::getInt32Ty(C);
	HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1);
	return true;
	}

	static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
	// We don't allow icmp's, and because we only look at single block loops,
	// we simply count the icmps, i.e. there should only be 1 for the backedge.
	if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
	return false;
	// FIXME: This is a workaround for poor cost modelling. Min/Max intrinsics are
	// not currently canonical, but soon will be. Code without them uses icmp, and
	// so is not tail predicated as per the condition above. In order to get the
	// same performance we treat min and max the same as an icmp for tailpred
	// purposes for the moment (we often rely on non-tailpred and higher VF's to
	// pick more optimial instructions like VQDMULH. They need to be recognized
	// directly by the vectorizer).
	if (auto *II = dyn_cast<IntrinsicInst>(&I))
	if ((II->getIntrinsicID() == Intrinsic::smin \|\|
	II->getIntrinsicID() == Intrinsic::smax \|\|
	II->getIntrinsicID() == Intrinsic::umin \|\|
	II->getIntrinsicID() == Intrinsic::umax) &&
	++ICmpCount > 1)
	return false;

	if (isa<FCmpInst>(&I))
	return false;

	// We could allow extending/narrowing FP loads/stores, but codegen is
	// too inefficient so reject this for now.
	if (isa<FPExtInst>(&I) \|\| isa<FPTruncInst>(&I))
	return false;

	// Extends have to be extending-loads
	if (isa<SExtInst>(&I) \|\| isa<ZExtInst>(&I) )
	if (!I.getOperand(0)->hasOneUse() \|\| !isa<LoadInst>(I.getOperand(0)))
	return false;

	// Truncs have to be narrowing-stores
	if (isa<TruncInst>(&I) )
	if (!I.hasOneUse() \|\| !isa<StoreInst>(*I.user_begin()))
	return false;

	return true;
	}

	// To set up a tail-predicated loop, we need to know the total number of
	// elements processed by that loop. Thus, we need to determine the element
	// size and:
	// 1) it should be uniform for all operations in the vector loop, so we
	// e.g. don't want any widening/narrowing operations.
	// 2) it should be smaller than i64s because we don't have vector operations
	// that work on i64s.
	// 3) we don't want elements to be reversed or shuffled, to make sure the
	// tail-predication masks/predicates the right lanes.
	//
	static bool canTailPredicateLoop(Loop L, LoopInfo LI, ScalarEvolution &SE,
	const DataLayout &DL,
	const LoopAccessInfo *LAI) {
	LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");

	// If there are live-out values, it is probably a reduction. We can predicate
	// most reduction operations freely under MVE using a combination of
	// prefer-predicated-reduction-select and inloop reductions. We limit this to
	// floating point and integer reductions, but don't check for operators
	// specifically here. If the value ends up not being a reduction (and so the
	// vectorizer cannot tailfold the loop), we should fall back to standard
	// vectorization automatically.
	SmallVector< Instruction *, 8 > LiveOuts;
	LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
	bool ReductionsDisabled =
	EnableTailPredication == TailPredication::EnabledNoReductions \|\|
	EnableTailPredication == TailPredication::ForceEnabledNoReductions;

	for (auto *I : LiveOuts) {
	if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() &&
	!I->getType()->isHalfTy()) {
	LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float "
	"live-out value\n");
	return false;
	}
	if (ReductionsDisabled) {
	LLVM_DEBUG(dbgs() << "Reductions not enabled\n");
	return false;
	}
	}

	// Next, check that all instructions can be tail-predicated.
	PredicatedScalarEvolution PSE = LAI->getPSE();
	SmallVector<Instruction *, 16> LoadStores;
	int ICmpCount = 0;

	for (BasicBlock *BB : L->blocks()) {
	for (Instruction &I : BB->instructionsWithoutDebug()) {
	if (isa<PHINode>(&I))
	continue;
	if (!canTailPredicateInstruction(I, ICmpCount)) {
	LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump());
	return false;
	}

	Type *T = I.getType();
	if (T->getScalarSizeInBits() > 32) {
	LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump());
	return false;
	}
	if (isa<StoreInst>(I) \|\| isa<LoadInst>(I)) {
	Value *Ptr = getLoadStorePointerOperand(&I);
	Type *AccessTy = getLoadStoreType(&I);
	int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
	if (NextStride == 1) {
	// TODO: for now only allow consecutive strides of 1. We could support
	// other strides as long as it is uniform, but let's keep it simple
	// for now.
	continue;
	} else if (NextStride == -1 \|\|
	(NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) \|\|
	(NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) {
	LLVM_DEBUG(dbgs()
	<< "Consecutive strides of 2 found, vld2/vstr2 can't "
	"be tail-predicated\n.");
	return false;
	// TODO: don't tail predicate if there is a reversed load?
	} else if (EnableMaskedGatherScatters) {
	// Gather/scatters do allow loading from arbitrary strides, at
	// least if they are loop invariant.
	// TODO: Loop variant strides should in theory work, too, but
	// this requires further testing.
	const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
	if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
	const SCEV Step = AR->getStepRecurrence(PSE.getSE());
	if (PSE.getSE()->isLoopInvariant(Step, L))
	continue;
	}
	}
	LLVM_DEBUG(dbgs() << "Bad stride found, can't "
	"tail-predicate\n.");
	return false;
	}
	}
	}

	LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n");
	return true;
	}

	bool ARMTTIImpl::preferPredicateOverEpilogue(
	Loop L, LoopInfo LI, ScalarEvolution &SE, AssumptionCache &AC,
	TargetLibraryInfo TLI, DominatorTree DT, LoopVectorizationLegality *LVL) {
	if (!EnableTailPredication) {
	LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
	return false;
	}

	// Creating a predicated vector loop is the first step for generating a
	// tail-predicated hardware loop, for which we need the MVE masked
	// load/stores instructions:
	if (!ST->hasMVEIntegerOps())
	return false;

	// For now, restrict this to single block loops.
	if (L->getNumBlocks() > 1) {
	LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "
	"loop.\n");
	return false;
	}

	assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");

	HardwareLoopInfo HWLoopInfo(L);
	if (!HWLoopInfo.canAnalyze(*LI)) {
	LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
	"analyzable.\n");
	return false;
	}

	// This checks if we have the low-overhead branch architecture
	// extension, and if we will create a hardware-loop:
	if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
	LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
	"profitable.\n");
	return false;
	}

	if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT)) {
	LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
	"a candidate.\n");
	return false;
	}

	return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
	}

	PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
	if (!ST->hasMVEIntegerOps() \|\| !EnableTailPredication)
	return PredicationStyle::None;

	// Intrinsic @llvm.get.active.lane.mask is supported.
	// It is used in the MVETailPredication pass, which requires the number of
	// elements processed by this vector loop to setup the tail-predicated
	// loop.
	return PredicationStyle::Data;
	}
	void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
	TTI::UnrollingPreferences &UP,
	OptimizationRemarkEmitter *ORE) {
	// Enable Upper bound unrolling universally, not dependant upon the conditions
	// below.
	UP.UpperBound = true;

	// Only currently enable these preferences for M-Class cores.
	if (!ST->isMClass())
	return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);

	// Disable loop unrolling for Oz and Os.
	UP.OptSizeThreshold = 0;
	UP.PartialOptSizeThreshold = 0;
	if (L->getHeader()->getParent()->hasOptSize())
	return;

	SmallVector<BasicBlock*, 4> ExitingBlocks;
	L->getExitingBlocks(ExitingBlocks);
	LLVM_DEBUG(dbgs() << "Loop has:\n"
	<< "Blocks: " << L->getNumBlocks() << "\n"
	<< "Exit blocks: " << ExitingBlocks.size() << "\n");

	// Only allow another exit other than the latch. This acts as an early exit
	// as it mirrors the profitability calculation of the runtime unroller.
	if (ExitingBlocks.size() > 2)
	return;

	// Limit the CFG of the loop body for targets with a branch predictor.
	// Allowing 4 blocks permits if-then-else diamonds in the body.
	if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
	return;

	// Don't unroll vectorized loops, including the remainder loop
	if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
	return;

	// Scan the loop: don't unroll loops with calls as this could prevent
	// inlining.
	InstructionCost Cost = 0;
	for (auto *BB : L->getBlocks()) {
	for (auto &I : *BB) {
	// Don't unroll vectorised loop. MVE does not benefit from it as much as
	// scalar code.
	if (I.getType()->isVectorTy())
	return;

	if (isa<CallInst>(I) \|\| isa<InvokeInst>(I)) {
	if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
	if (!isLoweredToCall(F))
	continue;
	}
	return;
	}

	SmallVector<const Value*, 4> Operands(I.operand_values());
	Cost +=
	getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
	}
	}

	// On v6m cores, there are very few registers available. We can easily end up
	// spilling and reloading more registers in an unrolled loop. Look at the
	// number of LCSSA phis as a rough measure of how many registers will need to
	// be live out of the loop, reducing the default unroll count if more than 1
	// value is needed. In the long run, all of this should be being learnt by a
	// machine.
	unsigned UnrollCount = 4;
	if (ST->isThumb1Only()) {
	unsigned ExitingValues = 0;
	SmallVector<BasicBlock *, 4> ExitBlocks;
	L->getExitBlocks(ExitBlocks);
	for (auto *Exit : ExitBlocks) {
	// Count the number of LCSSA phis. Exclude values coming from GEP's as
	// only the last is expected to be needed for address operands.
	unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) {
	return PH.getNumOperands() != 1 \|\|
	!isa<GetElementPtrInst>(PH.getOperand(0));
	});
	ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues;
	}
	if (ExitingValues)
	UnrollCount /= ExitingValues;
	if (UnrollCount <= 1)
	return;
	}

	LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
	LLVM_DEBUG(dbgs() << "Default Runtime Unroll Count: " << UnrollCount << "\n");

	UP.Partial = true;
	UP.Runtime = true;
	UP.UnrollRemainder = true;
	UP.DefaultUnrollRuntimeCount = UnrollCount;
	UP.UnrollAndJam = true;
	UP.UnrollAndJamInnerLoopThreshold = 60;

	// Force unrolling small loops can be very useful because of the branch
	// taken cost of the backedge.
	if (Cost < 12)
	UP.Force = true;
	}

	void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
	TTI::PeelingPreferences &PP) {
	BaseT::getPeelingPreferences(L, SE, PP);
	}

	bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty,
	TTI::ReductionFlags Flags) const {
	if (!ST->hasMVEIntegerOps())
	return false;

	unsigned ScalarBits = Ty->getScalarSizeInBits();
	switch (Opcode) {
	case Instruction::Add:
	return ScalarBits <= 64;
	default:
	return false;
	}
	}

	bool ARMTTIImpl::preferPredicatedReductionSelect(
	unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const {
	if (!ST->hasMVEIntegerOps())
	return false;
	return true;
	}
	diff --git a/llvm/lib/Target/Sparc/SparcCallingConv.td b/llvm/lib/Target/Sparc/SparcCallingConv.td
	index e6d23f741ea5..8afd0a7fc09a 100644
	--- a/llvm/lib/Target/Sparc/SparcCallingConv.td
	+++ b/llvm/lib/Target/Sparc/SparcCallingConv.td
	@@ -1,143 +1,147 @@
	//===-- SparcCallingConv.td - Calling Conventions Sparc ----- tablegen --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This describes the calling conventions for the Sparc architectures.
	//
	//===----------------------------------------------------------------------===//

	//===----------------------------------------------------------------------===//
	// SPARC v8 32-bit.
	//===----------------------------------------------------------------------===//

	def CC_Sparc32 : CallingConv<[
	// Custom assign SRet to [sp+64].
	CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
	// i32 f32 arguments get passed in integer registers if there is space.
	CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
	// f64 arguments are split and passed through registers or through stack.
	CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
	// As are v2i32 arguments (this would be the default behavior for
	// v2i32 if it wasn't allocated to the IntPair register-class)
	CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,


	// Alternatively, they are assigned to the stack in 4-byte aligned units.
	CCAssignToStack<4, 4>
	]>;

	def RetCC_Sparc32 : CallingConv<[
	CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
	CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
	CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
	CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
	]>;


	//===----------------------------------------------------------------------===//
	// SPARC v9 64-bit.
	//===----------------------------------------------------------------------===//
	//
	// The 64-bit ABI conceptually assigns all function arguments to a parameter
	// array starting at [%fp+BIAS+128] in the callee's stack frame. All arguments
	// occupy a multiple of 8 bytes in the array. Integer arguments are extended to
	// 64 bits by the caller. Floats are right-aligned in their 8-byte slot, the
	// first 4 bytes in the slot are undefined.
	//
	// The integer registers %i0 to %i5 shadow the first 48 bytes of the parameter
	// array at fixed offsets. Integer arguments are promoted to registers when
	// possible.
	//
	// The floating point registers %f0 to %f31 shadow the first 128 bytes of the
	// parameter array at fixed offsets. Float and double parameters are promoted
	// to these registers when possible.
	//
	// Structs up to 16 bytes in size are passed by value. They are right-aligned
	// in one or two 8-byte slots in the parameter array. Struct members are
	// promoted to both floating point and integer registers when possible. A
	// struct containing two floats would thus be passed in %f0 and %f1, while two
	// float function arguments would occupy 8 bytes each, and be passed in %f1 and
	// %f3.
	//
	// When a struct { int, float } is passed by value, the int goes in the high
	// bits of an integer register while the float goes in a floating point
	// register.
	//
	// The difference is encoded in LLVM IR using the inreg attribute on function
	// arguments:
	//
	// C: void f(float, float);
	// IR: declare void f(float %f1, float %f3)
	//
	// C: void f(struct { float f0, f1; });
	// IR: declare void f(float inreg %f0, float inreg %f1)
	//
	// C: void f(int, float);
	// IR: declare void f(int signext %i0, float %f3)
	//
	// C: void f(struct { int i0high; float f1; });
	// IR: declare void f(i32 inreg %i0high, float inreg %f1)
	//
	// Two ints in a struct are simply coerced to i64:
	//
	// C: void f(struct { int i0high, i0low; });
	// IR: declare void f(i64 %i0.coerced)
	//
	// The frontend and backend divide the task of producing ABI compliant code for
	// C functions. The C frontend will:
	//
	// - Annotate integer arguments with zeroext or signext attributes.
	//
	// - Split structs into one or two 64-bit sized chunks, or 32-bit chunks with
	// inreg attributes.
	//
	// - Pass structs larger than 16 bytes indirectly with an explicit pointer
	// argument. The byval attribute is not used.
	//
	// The backend will:
	//
	// - Assign all arguments to 64-bit aligned stack slots, 32-bits for inreg.
	//
	// - Promote to integer or floating point registers depending on type.
	//
	// Function return values are passed exactly like function arguments, except a
	// struct up to 32 bytes in size can be returned in registers.

	// Function arguments AND most return values.
	def CC_Sparc64 : CallingConv<[
	// The frontend uses the inreg flag to indicate i32 and float arguments from
	// structs. These arguments are not promoted to 64 bits, but they can still
	// be assigned to integer and float registers.
	CCIfInReg<CCIfType<[i32, f32], CCCustom<"CC_Sparc64_Half">>>,

	// All integers are promoted to i64 by the caller.
	CCIfType<[i32], CCPromoteToType<i64>>,

	// Custom assignment is required because stack space is reserved for all
	// arguments whether they are passed in registers or not.
	CCCustom<"CC_Sparc64_Full">
	]>;

	def RetCC_Sparc64 : CallingConv<[
	// A single f32 return value always goes in %f0. The ABI doesn't specify what
	// happens to multiple f32 return values outside a struct.
	- CCIfType<[f32], CCCustom<"CC_Sparc64_Half">>,
	+ CCIfType<[f32], CCCustom<"RetCC_Sparc64_Half">>,

	- // Otherwise, return values are passed exactly like arguments.
	- CCDelegateTo<CC_Sparc64>
	+ // Otherwise, return values are passed exactly like arguments, except that
	+ // returns that are too big to fit into the registers is passed as an sret
	+ // instead.
	+ CCIfInReg<CCIfType<[i32, f32], CCCustom<"RetCC_Sparc64_Half">>>,
	+ CCIfType<[i32], CCPromoteToType<i64>>,
	+ CCCustom<"RetCC_Sparc64_Full">
	]>;

	// Callee-saved registers are handled by the register window mechanism.
	def CSR : CalleeSavedRegs<(add)> {
	let OtherPreserved = (add (sequence "I%u", 0, 7),
	(sequence "L%u", 0, 7), O6);
	}

	// Callee-saved registers for calls with ReturnsTwice attribute.
	def RTCSR : CalleeSavedRegs<(add)> {
	let OtherPreserved = (add I6, I7);
	}
	diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
	index 2cb74e7709c7..f55675089102 100644
	--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
	+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
	@@ -1,3513 +1,3562 @@
	//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the interfaces that Sparc uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#include "SparcISelLowering.h"
	#include "MCTargetDesc/SparcMCExpr.h"
	#include "SparcMachineFunctionInfo.h"
	#include "SparcRegisterInfo.h"
	#include "SparcTargetMachine.h"
	#include "SparcTargetObjectFile.h"
	#include "llvm/ADT/StringExtras.h"
	#include "llvm/ADT/StringSwitch.h"
	#include "llvm/CodeGen/CallingConvLower.h"
	#include "llvm/CodeGen/MachineFrameInfo.h"
	#include "llvm/CodeGen/MachineFunction.h"
	#include "llvm/CodeGen/MachineInstrBuilder.h"
	#include "llvm/CodeGen/MachineRegisterInfo.h"
	#include "llvm/CodeGen/SelectionDAG.h"
	#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	using namespace llvm;


	//===----------------------------------------------------------------------===//
	// Calling Convention Implementation
	//===----------------------------------------------------------------------===//

	static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
	MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	ISD::ArgFlagsTy &ArgFlags, CCState &State)
	{
	assert (ArgFlags.isSRet());

	// Assign SRet argument.
	State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
	0,
	LocVT, LocInfo));
	return true;
	}

	static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
	MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	ISD::ArgFlagsTy &ArgFlags, CCState &State)
	{
	static const MCPhysReg RegList[] = {
	SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
	};
	// Try to get first reg.
	if (Register Reg = State.AllocateReg(RegList)) {
	State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	} else {
	// Assign whole thing in stack.
	State.addLoc(CCValAssign::getCustomMem(
	ValNo, ValVT, State.AllocateStack(8, Align(4)), LocVT, LocInfo));
	return true;
	}

	// Try to get second reg.
	if (Register Reg = State.AllocateReg(RegList))
	State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	else
	State.addLoc(CCValAssign::getCustomMem(
	ValNo, ValVT, State.AllocateStack(4, Align(4)), LocVT, LocInfo));
	return true;
	}

	static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
	MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	ISD::ArgFlagsTy &ArgFlags, CCState &State)
	{
	static const MCPhysReg RegList[] = {
	SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
	};

	// Try to get first reg.
	if (Register Reg = State.AllocateReg(RegList))
	State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	else
	return false;

	// Try to get second reg.
	if (Register Reg = State.AllocateReg(RegList))
	State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	else
	return false;

	return true;
	}

	// Allocate a full-sized argument for the 64-bit ABI.
	-static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
	- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+static bool Analyze_CC_Sparc64_Full(bool IsReturn, unsigned &ValNo, MVT &ValVT,
	+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	assert((LocVT == MVT::f32 \|\| LocVT == MVT::f128
	\|\| LocVT.getSizeInBits() == 64) &&
	"Can't handle non-64 bits locations");

	// Stack space is allocated for all arguments starting from [%fp+BIAS+128].
	unsigned size = (LocVT == MVT::f128) ? 16 : 8;
	Align alignment = (LocVT == MVT::f128) ? Align(16) : Align(8);
	unsigned Offset = State.AllocateStack(size, alignment);
	unsigned Reg = 0;

	if (LocVT == MVT::i64 && Offset < 6*8)
	// Promote integers to %i0-%i5.
	Reg = SP::I0 + Offset/8;
	else if (LocVT == MVT::f64 && Offset < 16*8)
	// Promote doubles to %d0-%d30. (Which LLVM calls D0-D15).
	Reg = SP::D0 + Offset/8;
	else if (LocVT == MVT::f32 && Offset < 16*8)
	// Promote floats to %f1, %f3, ...
	Reg = SP::F1 + Offset/4;
	else if (LocVT == MVT::f128 && Offset < 16*8)
	// Promote long doubles to %q0-%q28. (Which LLVM calls Q0-Q7).
	Reg = SP::Q0 + Offset/16;

	// Promote to register when possible, otherwise use the stack slot.
	if (Reg) {
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	return true;
	}

	+ // Bail out if this is a return CC and we run out of registers to place
	+ // values into.
	+ if (IsReturn)
	+ return false;
	+
	// This argument goes on the stack in an 8-byte slot.
	// When passing floats, LocVT is smaller than 8 bytes. Adjust the offset to
	// the right-aligned float. The first 4 bytes of the stack slot are undefined.
	if (LocVT == MVT::f32)
	Offset += 4;

	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	return true;
	}

	// Allocate a half-sized argument for the 64-bit ABI.
	//
	// This is used when passing { float, int } structs by value in registers.
	-static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT,
	- MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+static bool Analyze_CC_Sparc64_Half(bool IsReturn, unsigned &ValNo, MVT &ValVT,
	+ MVT &LocVT, CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	assert(LocVT.getSizeInBits() == 32 && "Can't handle non-32 bits locations");
	unsigned Offset = State.AllocateStack(4, Align(4));

	if (LocVT == MVT::f32 && Offset < 16*8) {
	// Promote floats to %f0-%f31.
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, SP::F0 + Offset/4,
	LocVT, LocInfo));
	return true;
	}

	if (LocVT == MVT::i32 && Offset < 6*8) {
	// Promote integers to %i0-%i5, using half the register.
	unsigned Reg = SP::I0 + Offset/8;
	LocVT = MVT::i64;
	LocInfo = CCValAssign::AExt;

	// Set the Custom bit if this i32 goes in the high bits of a register.
	if (Offset % 8 == 0)
	State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg,
	LocVT, LocInfo));
	else
	State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
	return true;
	}

	+ // Bail out if this is a return CC and we run out of registers to place
	+ // values into.
	+ if (IsReturn)
	+ return false;
	+
	State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
	return true;
	}

	+static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
	+ CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+ return Analyze_CC_Sparc64_Full(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
	+ State);
	+}
	+
	+static bool CC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
	+ CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+ return Analyze_CC_Sparc64_Half(false, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
	+ State);
	+}
	+
	+static bool RetCC_Sparc64_Full(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
	+ CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+ return Analyze_CC_Sparc64_Full(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
	+ State);
	+}
	+
	+static bool RetCC_Sparc64_Half(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
	+ CCValAssign::LocInfo &LocInfo,
	+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
	+ return Analyze_CC_Sparc64_Half(true, ValNo, ValVT, LocVT, LocInfo, ArgFlags,
	+ State);
	+}
	+
	#include "SparcGenCallingConv.inc"

	// The calling conventions in SparcCallingConv.td are described in terms of the
	// callee's register window. This function translates registers to the
	// corresponding caller window %o register.
	static unsigned toCallerWindow(unsigned Reg) {
	static_assert(SP::I0 + 7 == SP::I7 && SP::O0 + 7 == SP::O7,
	"Unexpected enum");
	if (Reg >= SP::I0 && Reg <= SP::I7)
	return Reg - SP::I0 + SP::O0;
	return Reg;
	}

	+bool SparcTargetLowering::CanLowerReturn(
	+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
	+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
	+ SmallVector<CCValAssign, 16> RVLocs;
	+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
	+ return CCInfo.CheckReturn(Outs, Subtarget->is64Bit() ? RetCC_Sparc64
	+ : RetCC_Sparc32);
	+}
	+
	SDValue
	SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	if (Subtarget->is64Bit())
	return LowerReturn_64(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
	return LowerReturn_32(Chain, CallConv, IsVarArg, Outs, OutVals, DL, DAG);
	}

	SDValue
	SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	MachineFunction &MF = DAG.getMachineFunction();

	// CCValAssign - represent the assignment of the return value to locations.
	SmallVector<CCValAssign, 16> RVLocs;

	// CCState - Info about the registers and stack slot.
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Analyze return values.
	CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);

	SDValue Flag;
	SmallVector<SDValue, 4> RetOps(1, Chain);
	// Make room for the return address offset.
	RetOps.push_back(SDValue());

	// Copy the result values into the output registers.
	for (unsigned i = 0, realRVLocIdx = 0;
	i != RVLocs.size();
	++i, ++realRVLocIdx) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");

	SDValue Arg = OutVals[realRVLocIdx];

	if (VA.needsCustom()) {
	assert(VA.getLocVT() == MVT::v2i32);
	// Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
	// happen by default if this wasn't a legal type)

	SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
	Arg,
	DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
	SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
	Arg,
	DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));

	Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	VA = RVLocs[++i]; // skip ahead to next loc
	Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
	Flag);
	} else
	Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);

	// Guarantee that all emitted copies are stuck together with flags.
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}

	unsigned RetAddrOffset = 8; // Call Inst + Delay Slot
	// If the function returns a struct, copy the SRetReturnReg to I0
	if (MF.getFunction().hasStructRetAttr()) {
	SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
	Register Reg = SFI->getSRetReturnReg();
	if (!Reg)
	llvm_unreachable("sret virtual register not created in the entry block");
	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT);
	Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(SP::I0, PtrVT));
	RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
	}

	RetOps[0] = Chain; // Update chain.
	RetOps[1] = DAG.getConstant(RetAddrOffset, DL, MVT::i32);

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
	}

	// Lower return values for the 64-bit ABI.
	// Return values are passed the exactly the same way as function arguments.
	SDValue
	SparcTargetLowering::LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const {
	// CCValAssign - represent the assignment of the return value to locations.
	SmallVector<CCValAssign, 16> RVLocs;

	// CCState - Info about the registers and stack slot.
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Analyze return values.
	CCInfo.AnalyzeReturn(Outs, RetCC_Sparc64);

	SDValue Flag;
	SmallVector<SDValue, 4> RetOps(1, Chain);

	// The second operand on the return instruction is the return address offset.
	// The return address is always %i7+8 with the 64-bit ABI.
	RetOps.push_back(DAG.getConstant(8, DL, MVT::i32));

	// Copy the result values into the output registers.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign &VA = RVLocs[i];
	assert(VA.isRegLoc() && "Can only return in registers!");
	SDValue OutVal = OutVals[i];

	// Integer return values must be sign or zero extended by the callee.
	switch (VA.getLocInfo()) {
	case CCValAssign::Full: break;
	case CCValAssign::SExt:
	OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
	break;
	case CCValAssign::ZExt:
	OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
	break;
	case CCValAssign::AExt:
	OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
	break;
	default:
	llvm_unreachable("Unknown loc info!");
	}

	// The custom bit on an i32 return value indicates that it should be passed
	// in the high bits of the register.
	if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
	OutVal = DAG.getNode(ISD::SHL, DL, MVT::i64, OutVal,
	DAG.getConstant(32, DL, MVT::i32));

	// The next value may go in the low bits of the same register.
	// Handle both at once.
	if (i+1 < RVLocs.size() && RVLocs[i+1].getLocReg() == VA.getLocReg()) {
	SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, OutVals[i+1]);
	OutVal = DAG.getNode(ISD::OR, DL, MVT::i64, OutVal, NV);
	// Skip the next value, it's already done.
	++i;
	}
	}

	Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);

	// Guarantee that all emitted copies are stuck together with flags.
	Flag = Chain.getValue(1);
	RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
	}

	RetOps[0] = Chain; // Update chain.

	// Add the flag if we have it.
	if (Flag.getNode())
	RetOps.push_back(Flag);

	return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
	}

	SDValue SparcTargetLowering::LowerFormalArguments(
	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	if (Subtarget->is64Bit())
	return LowerFormalArguments_64(Chain, CallConv, IsVarArg, Ins,
	DL, DAG, InVals);
	return LowerFormalArguments_32(Chain, CallConv, IsVarArg, Ins,
	DL, DAG, InVals);
	}

	/// LowerFormalArguments32 - V8 uses a very simple ABI, where all values are
	/// passed in either one or two GPRs, including FP values. TODO: we should
	/// pass FP values in FP registers for fastcc functions.
	SDValue SparcTargetLowering::LowerFormalArguments_32(
	SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineRegisterInfo &RegInfo = MF.getRegInfo();
	SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();

	// Assign locations to all of the incoming arguments.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);

	const unsigned StackOffset = 92;
	bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();

	unsigned InIdx = 0;
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
	CCValAssign &VA = ArgLocs[i];

	if (Ins[InIdx].Flags.isSRet()) {
	if (InIdx != 0)
	report_fatal_error("sparc only supports sret on the first parameter");
	// Get SRet from [%fp+64].
	int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, 64, true);
	SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
	SDValue Arg =
	DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
	InVals.push_back(Arg);
	continue;
	}

	if (VA.isRegLoc()) {
	if (VA.needsCustom()) {
	assert(VA.getLocVT() == MVT::f64 \|\| VA.getLocVT() == MVT::v2i32);

	Register VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
	MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
	SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);

	assert(i+1 < e);
	CCValAssign &NextVA = ArgLocs[++i];

	SDValue LoVal;
	if (NextVA.isMemLoc()) {
	int FrameIdx = MF.getFrameInfo().
	CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
	SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
	LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
	} else {
	Register loReg = MF.addLiveIn(NextVA.getLocReg(),
	&SP::IntRegsRegClass);
	LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
	}

	if (IsLittleEndian)
	std::swap(LoVal, HiVal);

	SDValue WholeValue =
	DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
	WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
	InVals.push_back(WholeValue);
	continue;
	}
	Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
	MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
	SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
	if (VA.getLocVT() == MVT::f32)
	Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
	else if (VA.getLocVT() != MVT::i32) {
	Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
	DAG.getValueType(VA.getLocVT()));
	Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
	}
	InVals.push_back(Arg);
	continue;
	}

	assert(VA.isMemLoc());

	unsigned Offset = VA.getLocMemOffset()+StackOffset;
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	if (VA.needsCustom()) {
	assert(VA.getValVT() == MVT::f64 \|\| VA.getValVT() == MVT::v2i32);
	// If it is double-word aligned, just load.
	if (Offset % 8 == 0) {
	int FI = MF.getFrameInfo().CreateFixedObject(8,
	Offset,
	true);
	SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
	SDValue Load =
	DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo());
	InVals.push_back(Load);
	continue;
	}

	int FI = MF.getFrameInfo().CreateFixedObject(4,
	Offset,
	true);
	SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
	SDValue HiVal =
	DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
	int FI2 = MF.getFrameInfo().CreateFixedObject(4,
	Offset+4,
	true);
	SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT);

	SDValue LoVal =
	DAG.getLoad(MVT::i32, dl, Chain, FIPtr2, MachinePointerInfo());

	if (IsLittleEndian)
	std::swap(LoVal, HiVal);

	SDValue WholeValue =
	DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
	WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
	InVals.push_back(WholeValue);
	continue;
	}

	int FI = MF.getFrameInfo().CreateFixedObject(4,
	Offset,
	true);
	SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
	SDValue Load ;
	if (VA.getValVT() == MVT::i32 \|\| VA.getValVT() == MVT::f32) {
	Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo());
	} else if (VA.getValVT() == MVT::f128) {
	report_fatal_error("SPARCv8 does not handle f128 in calls; "
	"pass indirectly");
	} else {
	// We shouldn't see any other value types here.
	llvm_unreachable("Unexpected ValVT encountered in frame lowering.");
	}
	InVals.push_back(Load);
	}

	if (MF.getFunction().hasStructRetAttr()) {
	// Copy the SRet Argument to SRetReturnReg.
	SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
	Register Reg = SFI->getSRetReturnReg();
	if (!Reg) {
	Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
	SFI->setSRetReturnReg(Reg);
	}
	SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
	}

	// Store remaining ArgRegs to the stack if this is a varargs function.
	if (isVarArg) {
	static const MCPhysReg ArgRegs[] = {
	SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
	};
	unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs);
	const MCPhysReg CurArgReg = ArgRegs+NumAllocated, ArgRegEnd = ArgRegs+6;
	unsigned ArgOffset = CCInfo.getNextStackOffset();
	if (NumAllocated == 6)
	ArgOffset += StackOffset;
	else {
	assert(!ArgOffset);
	ArgOffset = 68+4*NumAllocated;
	}

	// Remember the vararg offset for the va_start implementation.
	FuncInfo->setVarArgsFrameOffset(ArgOffset);

	std::vector<SDValue> OutChains;

	for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
	Register VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
	MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
	SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);

	int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, ArgOffset,
	true);
	SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);

	OutChains.push_back(
	DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, MachinePointerInfo()));
	ArgOffset += 4;
	}

	if (!OutChains.empty()) {
	OutChains.push_back(Chain);
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}
	}

	return Chain;
	}

	// Lower formal arguments for the 64 bit ABI.
	SDValue SparcTargetLowering::LowerFormalArguments_64(
	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
	MachineFunction &MF = DAG.getMachineFunction();

	// Analyze arguments according to CC_Sparc64.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc64);

	// The argument array begins at %fp+BIAS+128, after the register save area.
	const unsigned ArgArea = 128;

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	CCValAssign &VA = ArgLocs[i];
	if (VA.isRegLoc()) {
	// This argument is passed in a register.
	// All integer register arguments are promoted by the caller to i64.

	// Create a virtual register for the promoted live-in value.
	Register VReg = MF.addLiveIn(VA.getLocReg(),
	getRegClassFor(VA.getLocVT()));
	SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());

	// Get the high bits for i32 struct elements.
	if (VA.getValVT() == MVT::i32 && VA.needsCustom())
	Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
	DAG.getConstant(32, DL, MVT::i32));

	// The caller promoted the argument, so insert an Assert?ext SDNode so we
	// won't promote the value again in this function.
	switch (VA.getLocInfo()) {
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
	DAG.getValueType(VA.getValVT()));
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
	DAG.getValueType(VA.getValVT()));
	break;
	default:
	break;
	}

	// Truncate the register down to the argument type.
	if (VA.isExtInLoc())
	Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);

	InVals.push_back(Arg);
	continue;
	}

	// The registers are exhausted. This argument was passed on the stack.
	assert(VA.isMemLoc());
	// The CC_Sparc64_Full/Half functions compute stack offsets relative to the
	// beginning of the arguments area at %fp+BIAS+128.
	unsigned Offset = VA.getLocMemOffset() + ArgArea;
	unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
	// Adjust offset for extended arguments, SPARC is big-endian.
	// The caller will have written the full slot with extended bytes, but we
	// prefer our own extending loads.
	if (VA.isExtInLoc())
	Offset += 8 - ValSize;
	int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
	InVals.push_back(
	DAG.getLoad(VA.getValVT(), DL, Chain,
	DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
	MachinePointerInfo::getFixedStack(MF, FI)));
	}

	if (!IsVarArg)
	return Chain;

	// This function takes variable arguments, some of which may have been passed
	// in registers %i0-%i5. Variable floating point arguments are never passed
	// in floating point registers. They go on %i0-%i5 or on the stack like
	// integer arguments.
	//
	// The va_start intrinsic needs to know the offset to the first variable
	// argument.
	unsigned ArgOffset = CCInfo.getNextStackOffset();
	SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
	// Skip the 128 bytes of register save area.
	FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
	Subtarget->getStackPointerBias());

	// Save the variable arguments that were passed in registers.
	// The caller is required to reserve stack space for 6 arguments regardless
	// of how many arguments were actually passed.
	SmallVector<SDValue, 8> OutChains;
	for (; ArgOffset < 6*8; ArgOffset += 8) {
	Register VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
	SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
	int FI = MF.getFrameInfo().CreateFixedObject(8, ArgOffset + ArgArea, true);
	auto PtrVT = getPointerTy(MF.getDataLayout());
	OutChains.push_back(
	DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
	MachinePointerInfo::getFixedStack(MF, FI)));
	}

	if (!OutChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);

	return Chain;
	}

	SDValue
	SparcTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	if (Subtarget->is64Bit())
	return LowerCall_64(CLI, InVals);
	return LowerCall_32(CLI, InVals);
	}

	static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
	const CallBase *Call) {
	if (Call)
	return Call->hasFnAttr(Attribute::ReturnsTwice);

	const Function *CalleeFn = nullptr;
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
	CalleeFn = dyn_cast<Function>(G->getGlobal());
	} else if (ExternalSymbolSDNode *E =
	dyn_cast<ExternalSymbolSDNode>(Callee)) {
	const Function &Fn = DAG.getMachineFunction().getFunction();
	const Module *M = Fn.getParent();
	const char *CalleeName = E->getSymbol();
	CalleeFn = M->getFunction(CalleeName);
	}

	if (!CalleeFn)
	return false;
	return CalleeFn->hasFnAttribute(Attribute::ReturnsTwice);
	}

	/// IsEligibleForTailCallOptimization - Check whether the call is eligible
	/// for tail call optimization.
	bool SparcTargetLowering::IsEligibleForTailCallOptimization(
	CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF) const {

	auto &Outs = CLI.Outs;
	auto &Caller = MF.getFunction();

	// Do not tail call opt functions with "disable-tail-calls" attribute.
	if (Caller.getFnAttribute("disable-tail-calls").getValueAsString() == "true")
	return false;

	// Do not tail call opt if the stack is used to pass parameters.
	if (CCInfo.getNextStackOffset() != 0)
	return false;

	// Do not tail call opt if either the callee or caller returns
	// a struct and the other does not.
	if (!Outs.empty() && Caller.hasStructRetAttr() != Outs[0].Flags.isSRet())
	return false;

	// Byval parameters hand the function a pointer directly into the stack area
	// we want to reuse during a tail call.
	for (auto &Arg : Outs)
	if (Arg.Flags.isByVal())
	return false;

	return true;
	}

	// Lower a call for the 32-bit ABI.
	SDValue
	SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc &dl = CLI.DL;
	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
	SDValue Chain = CLI.Chain;
	SDValue Callee = CLI.Callee;
	bool &isTailCall = CLI.IsTailCall;
	CallingConv::ID CallConv = CLI.CallConv;
	bool isVarArg = CLI.IsVarArg;

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);

	isTailCall = isTailCall && IsEligibleForTailCallOptimization(
	CCInfo, CLI, DAG.getMachineFunction());

	// Get the size of the outgoing arguments stack space requirement.
	unsigned ArgsSize = CCInfo.getNextStackOffset();

	// Keep stack frames 8-byte aligned.
	ArgsSize = (ArgsSize+7) & ~7;

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();

	// Create local copies for byval args.
	SmallVector<SDValue, 8> ByValArgs;
	for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
	ISD::ArgFlagsTy Flags = Outs[i].Flags;
	if (!Flags.isByVal())
	continue;

	SDValue Arg = OutVals[i];
	unsigned Size = Flags.getByValSize();
	Align Alignment = Flags.getNonZeroByValAlign();

	if (Size > 0U) {
	int FI = MFI.CreateStackObject(Size, Alignment, false);
	SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
	SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);

	Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Alignment,
	false, // isVolatile,
	(Size <= 32), // AlwaysInline if size <= 32,
	false, // isTailCall
	MachinePointerInfo(), MachinePointerInfo());
	ByValArgs.push_back(FIPtr);
	}
	else {
	SDValue nullVal;
	ByValArgs.push_back(nullVal);
	}
	}

	assert(!isTailCall \|\| ArgsSize == 0);

	if (!isTailCall)
	Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, dl);

	SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
	SmallVector<SDValue, 8> MemOpChains;

	const unsigned StackOffset = 92;
	bool hasStructRetAttr = false;
	unsigned SRetArgSize = 0;
	// Walk the register/memloc assignments, inserting copies/loads.
	for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
	i != e;
	++i, ++realArgIdx) {
	CCValAssign &VA = ArgLocs[i];
	SDValue Arg = OutVals[realArgIdx];

	ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;

	// Use local copy if it is a byval arg.
	if (Flags.isByVal()) {
	Arg = ByValArgs[byvalArgIdx++];
	if (!Arg) {
	continue;
	}
	}

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default: llvm_unreachable("Unknown loc info!");
	case CCValAssign::Full: break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
	break;
	case CCValAssign::BCvt:
	Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
	break;
	}

	if (Flags.isSRet()) {
	assert(VA.needsCustom());

	if (isTailCall)
	continue;

	// store SRet argument in %sp+64
	SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
	SDValue PtrOff = DAG.getIntPtrConstant(64, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
	hasStructRetAttr = true;
	// sret only allowed on first argument
	assert(Outs[realArgIdx].OrigArgIndex == 0);
	SRetArgSize =
	DAG.getDataLayout().getTypeAllocSize(CLI.getArgs()[0].IndirectType);
	continue;
	}

	if (VA.needsCustom()) {
	assert(VA.getLocVT() == MVT::f64 \|\| VA.getLocVT() == MVT::v2i32);

	if (VA.isMemLoc()) {
	unsigned Offset = VA.getLocMemOffset() + StackOffset;
	// if it is double-word aligned, just store.
	if (Offset % 8 == 0) {
	SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
	continue;
	}
	}

	if (VA.getLocVT() == MVT::f64) {
	// Move from the float value from float registers into the
	// integer registers.
	if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Arg))
	Arg = bitcastConstantFPToInt(C, dl, DAG);
	else
	Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
	}

	SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	Arg,
	DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
	SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
	Arg,
	DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));

	if (VA.isRegLoc()) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
	assert(i+1 != e);
	CCValAssign &NextVA = ArgLocs[++i];
	if (NextVA.isRegLoc()) {
	RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
	} else {
	// Store the second part in stack.
	unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
	SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo()));
	}
	} else {
	unsigned Offset = VA.getLocMemOffset() + StackOffset;
	// Store the first part.
	SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
	SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Part0, PtrOff, MachinePointerInfo()));
	// Store the second part.
	PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Part1, PtrOff, MachinePointerInfo()));
	}
	continue;
	}

	// Arguments that can be passed on register must be kept at
	// RegsToPass vector
	if (VA.isRegLoc()) {
	if (VA.getLocVT() != MVT::f32) {
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	continue;
	}
	Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
	RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
	continue;
	}

	assert(VA.isMemLoc());

	// Create a store off the stack pointer for this argument.
	SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
	SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + StackOffset,
	dl);
	PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
	}


	// Emit all stores, make sure the occur before any copies into physregs.
	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);

	// Build a sequence of copy-to-reg nodes chained together with token
	// chain and flag operands which copy the outgoing args into registers.
	// The InFlag in necessary since all emitted instructions must be
	// stuck together.
	SDValue InFlag;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Register Reg = RegsToPass[i].first;
	if (!isTailCall)
	Reg = toCallerWindow(Reg);
	Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
	InFlag = Chain.getValue(1);
	}

	bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);

	// If the callee is a GlobalAddress node (quite common, every direct call is)
	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
	// Likewise ExternalSymbol -> TargetExternalSymbol.
	unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
	: SparcMCExpr::VK_Sparc_WDISP30;
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32, 0, TF);
	else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
	Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32, TF);

	// Returns a chain & a flag for retval copy to use
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);
	if (hasStructRetAttr)
	Ops.push_back(DAG.getTargetConstant(SRetArgSize, dl, MVT::i32));
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Register Reg = RegsToPass[i].first;
	if (!isTailCall)
	Reg = toCallerWindow(Reg);
	Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
	}

	// Add a register mask operand representing the call-preserved registers.
	const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask =
	((hasReturnsTwice)
	? TRI->getRTCallPreservedMask(CallConv)
	: TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv));
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	if (InFlag.getNode())
	Ops.push_back(InFlag);

	if (isTailCall) {
	DAG.getMachineFunction().getFrameInfo().setHasTailCall();
	return DAG.getNode(SPISD::TAIL_CALL, dl, MVT::Other, Ops);
	}

	Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
	InFlag = Chain.getValue(1);

	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, dl, true),
	DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
	InFlag = Chain.getValue(1);

	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	+ assert(RVLocs[i].isRegLoc() && "Can only return in registers!");
	if (RVLocs[i].getLocVT() == MVT::v2i32) {
	SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32);
	SDValue Lo = DAG.getCopyFromReg(
	Chain, dl, toCallerWindow(RVLocs[i++].getLocReg()), MVT::i32, InFlag);
	Chain = Lo.getValue(1);
	InFlag = Lo.getValue(2);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Lo,
	DAG.getConstant(0, dl, MVT::i32));
	SDValue Hi = DAG.getCopyFromReg(
	Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), MVT::i32, InFlag);
	Chain = Hi.getValue(1);
	InFlag = Hi.getValue(2);
	Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Hi,
	DAG.getConstant(1, dl, MVT::i32));
	InVals.push_back(Vec);
	} else {
	Chain =
	DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
	RVLocs[i].getValVT(), InFlag)
	.getValue(1);
	InFlag = Chain.getValue(2);
	InVals.push_back(Chain.getValue(0));
	}
	}

	return Chain;
	}

	// FIXME? Maybe this could be a TableGen attribute on some registers and
	// this table could be generated automatically from RegInfo.
	Register SparcTargetLowering::getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const {
	Register Reg = StringSwitch<Register>(RegName)
	.Case("i0", SP::I0).Case("i1", SP::I1).Case("i2", SP::I2).Case("i3", SP::I3)
	.Case("i4", SP::I4).Case("i5", SP::I5).Case("i6", SP::I6).Case("i7", SP::I7)
	.Case("o0", SP::O0).Case("o1", SP::O1).Case("o2", SP::O2).Case("o3", SP::O3)
	.Case("o4", SP::O4).Case("o5", SP::O5).Case("o6", SP::O6).Case("o7", SP::O7)
	.Case("l0", SP::L0).Case("l1", SP::L1).Case("l2", SP::L2).Case("l3", SP::L3)
	.Case("l4", SP::L4).Case("l5", SP::L5).Case("l6", SP::L6).Case("l7", SP::L7)
	.Case("g0", SP::G0).Case("g1", SP::G1).Case("g2", SP::G2).Case("g3", SP::G3)
	.Case("g4", SP::G4).Case("g5", SP::G5).Case("g6", SP::G6).Case("g7", SP::G7)
	.Default(0);

	if (Reg)
	return Reg;

	report_fatal_error("Invalid register name global variable");
	}

	// Fixup floating point arguments in the ... part of a varargs call.
	//
	// The SPARC v9 ABI requires that floating point arguments are treated the same
	// as integers when calling a varargs function. This does not apply to the
	// fixed arguments that are part of the function's prototype.
	//
	// This function post-processes a CCValAssign array created by
	// AnalyzeCallOperands().
	static void fixupVariableFloatArgs(SmallVectorImpl<CCValAssign> &ArgLocs,
	ArrayRef<ISD::OutputArg> Outs) {
	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	const CCValAssign &VA = ArgLocs[i];
	MVT ValTy = VA.getLocVT();
	// FIXME: What about f32 arguments? C promotes them to f64 when calling
	// varargs functions.
	if (!VA.isRegLoc() \|\| (ValTy != MVT::f64 && ValTy != MVT::f128))
	continue;
	// The fixed arguments to a varargs function still go in FP registers.
	if (Outs[VA.getValNo()].IsFixed)
	continue;

	// This floating point argument should be reassigned.
	CCValAssign NewVA;

	// Determine the offset into the argument array.
	Register firstReg = (ValTy == MVT::f64) ? SP::D0 : SP::Q0;
	unsigned argSize = (ValTy == MVT::f64) ? 8 : 16;
	unsigned Offset = argSize * (VA.getLocReg() - firstReg);
	assert(Offset < 16*8 && "Offset out of range, bad register enum?");

	if (Offset < 6*8) {
	// This argument should go in %i0-%i5.
	unsigned IReg = SP::I0 + Offset/8;
	if (ValTy == MVT::f64)
	// Full register, just bitconvert into i64.
	NewVA = CCValAssign::getReg(VA.getValNo(), VA.getValVT(),
	IReg, MVT::i64, CCValAssign::BCvt);
	else {
	assert(ValTy == MVT::f128 && "Unexpected type!");
	// Full register, just bitconvert into i128 -- We will lower this into
	// two i64s in LowerCall_64.
	NewVA = CCValAssign::getCustomReg(VA.getValNo(), VA.getValVT(),
	IReg, MVT::i128, CCValAssign::BCvt);
	}
	} else {
	// This needs to go to memory, we're out of integer registers.
	NewVA = CCValAssign::getMem(VA.getValNo(), VA.getValVT(),
	Offset, VA.getLocVT(), VA.getLocInfo());
	}
	ArgLocs[i] = NewVA;
	}
	}

	// Lower a call for the 64-bit ABI.
	SDValue
	SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const {
	SelectionDAG &DAG = CLI.DAG;
	SDLoc DL = CLI.DL;
	SDValue Chain = CLI.Chain;
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	// Sparc target does not yet support tail call optimization.
	CLI.IsTailCall = false;

	// Analyze operands of the call, assigning locations to each operand.
	SmallVector<CCValAssign, 16> ArgLocs;
	CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
	*DAG.getContext());
	CCInfo.AnalyzeCallOperands(CLI.Outs, CC_Sparc64);

	// Get the size of the outgoing arguments stack space requirement.
	// The stack offset computed by CC_Sparc64 includes all arguments.
	// Called functions expect 6 argument words to exist in the stack frame, used
	// or not.
	unsigned ArgsSize = std::max(6*8u, CCInfo.getNextStackOffset());

	// Keep stack frames 16-byte aligned.
	ArgsSize = alignTo(ArgsSize, 16);

	// Varargs calls require special treatment.
	if (CLI.IsVarArg)
	fixupVariableFloatArgs(ArgLocs, CLI.Outs);

	// Adjust the stack pointer to make room for the arguments.
	// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
	// with more than 6 arguments.
	Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);

	// Collect the set of registers to pass to the function and their values.
	// This will be emitted as a sequence of CopyToReg nodes glued to the call
	// instruction.
	SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;

	// Collect chains from all the memory opeations that copy arguments to the
	// stack. They must follow the stack pointer adjustment above and precede the
	// call instruction itself.
	SmallVector<SDValue, 8> MemOpChains;

	for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
	const CCValAssign &VA = ArgLocs[i];
	SDValue Arg = CLI.OutVals[i];

	// Promote the value if needed.
	switch (VA.getLocInfo()) {
	default:
	llvm_unreachable("Unknown location info!");
	case CCValAssign::Full:
	break;
	case CCValAssign::SExt:
	Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::ZExt:
	Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::AExt:
	Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
	break;
	case CCValAssign::BCvt:
	// fixupVariableFloatArgs() may create bitcasts from f128 to i128. But
	// SPARC does not support i128 natively. Lower it into two i64, see below.
	if (!VA.needsCustom() \|\| VA.getValVT() != MVT::f128
	\|\| VA.getLocVT() != MVT::i128)
	Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
	break;
	}

	if (VA.isRegLoc()) {
	if (VA.needsCustom() && VA.getValVT() == MVT::f128
	&& VA.getLocVT() == MVT::i128) {
	// Store and reload into the integer register reg and reg+1.
	unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
	unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
	SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
	SDValue HiPtrOff = DAG.getIntPtrConstant(StackOffset, DL);
	HiPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, HiPtrOff);
	SDValue LoPtrOff = DAG.getIntPtrConstant(StackOffset + 8, DL);
	LoPtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, LoPtrOff);

	// Store to %sp+BIAS+128+Offset
	SDValue Store =
	DAG.getStore(Chain, DL, Arg, HiPtrOff, MachinePointerInfo());
	// Load into Reg and Reg+1
	SDValue Hi64 =
	DAG.getLoad(MVT::i64, DL, Store, HiPtrOff, MachinePointerInfo());
	SDValue Lo64 =
	DAG.getLoad(MVT::i64, DL, Store, LoPtrOff, MachinePointerInfo());
	RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()),
	Hi64));
	RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()+1),
	Lo64));
	continue;
	}

	// The custom bit on an i32 return value indicates that it should be
	// passed in the high bits of the register.
	if (VA.getValVT() == MVT::i32 && VA.needsCustom()) {
	Arg = DAG.getNode(ISD::SHL, DL, MVT::i64, Arg,
	DAG.getConstant(32, DL, MVT::i32));

	// The next value may go in the low bits of the same register.
	// Handle both at once.
	if (i+1 < ArgLocs.size() && ArgLocs[i+1].isRegLoc() &&
	ArgLocs[i+1].getLocReg() == VA.getLocReg()) {
	SDValue NV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
	CLI.OutVals[i+1]);
	Arg = DAG.getNode(ISD::OR, DL, MVT::i64, Arg, NV);
	// Skip the next value, it's already done.
	++i;
	}
	}
	RegsToPass.push_back(std::make_pair(toCallerWindow(VA.getLocReg()), Arg));
	continue;
	}

	assert(VA.isMemLoc());

	// Create a store off the stack pointer for this argument.
	SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
	// The argument area starts at %fp+BIAS+128 in the callee frame,
	// %sp+BIAS+128 in ours.
	SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() +
	Subtarget->getStackPointerBias() +
	128, DL);
	PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
	MemOpChains.push_back(
	DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
	}

	// Emit all stores, make sure they occur before the call.
	if (!MemOpChains.empty())
	Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);

	// Build a sequence of CopyToReg nodes glued together with token chain and
	// glue operands which copy the outgoing args into registers. The InGlue is
	// necessary since all emitted instructions must be stuck together in order
	// to pass the live physical registers.
	SDValue InGlue;
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
	Chain = DAG.getCopyToReg(Chain, DL,
	RegsToPass[i].first, RegsToPass[i].second, InGlue);
	InGlue = Chain.getValue(1);
	}

	// If the callee is a GlobalAddress node (quite common, every direct call is)
	// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
	// Likewise ExternalSymbol -> TargetExternalSymbol.
	SDValue Callee = CLI.Callee;
	bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
	unsigned TF = isPositionIndependent() ? SparcMCExpr::VK_Sparc_WPLT30
	: SparcMCExpr::VK_Sparc_WDISP30;
	if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
	Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT, 0, TF);
	else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
	Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, TF);

	// Build the operands for the call instruction itself.
	SmallVector<SDValue, 8> Ops;
	Ops.push_back(Chain);
	Ops.push_back(Callee);
	for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
	Ops.push_back(DAG.getRegister(RegsToPass[i].first,
	RegsToPass[i].second.getValueType()));

	// Add a register mask operand representing the call-preserved registers.
	const SparcRegisterInfo *TRI = Subtarget->getRegisterInfo();
	const uint32_t *Mask =
	((hasReturnsTwice) ? TRI->getRTCallPreservedMask(CLI.CallConv)
	: TRI->getCallPreservedMask(DAG.getMachineFunction(),
	CLI.CallConv));
	assert(Mask && "Missing call preserved mask for calling convention");
	Ops.push_back(DAG.getRegisterMask(Mask));

	// Make sure the CopyToReg nodes are glued to the call instruction which
	// consumes the registers.
	if (InGlue.getNode())
	Ops.push_back(InGlue);

	// Now the call itself.
	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops);
	InGlue = Chain.getValue(1);

	// Revert the stack pointer immediately after the call.
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
	DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
	InGlue = Chain.getValue(1);

	// Now extract the return values. This is more or less the same as
	// LowerFormalArguments_64.

	// Assign locations to each value returned by this call.
	SmallVector<CCValAssign, 16> RVLocs;
	CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
	*DAG.getContext());

	// Set inreg flag manually for codegen generated library calls that
	// return float.
	if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
	CLI.Ins[0].Flags.setInReg();

	RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_Sparc64);

	// Copy all of the result registers out of their specified physreg.
	for (unsigned i = 0; i != RVLocs.size(); ++i) {
	CCValAssign &VA = RVLocs[i];
	+ assert(VA.isRegLoc() && "Can only return in registers!");
	unsigned Reg = toCallerWindow(VA.getLocReg());

	// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
	// reside in the same register in the high and low bits. Reuse the
	// CopyFromReg previous node to avoid duplicate copies.
	SDValue RV;
	if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
	if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
	RV = Chain.getValue(0);

	// But usually we'll create a new CopyFromReg for a different register.
	if (!RV.getNode()) {
	RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
	Chain = RV.getValue(1);
	InGlue = Chain.getValue(2);
	}

	// Get the high bits for i32 struct elements.
	if (VA.getValVT() == MVT::i32 && VA.needsCustom())
	RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
	DAG.getConstant(32, DL, MVT::i32));

	// The callee promoted the return value, so insert an Assert?ext SDNode so
	// we won't promote the value again in this function.
	switch (VA.getLocInfo()) {
	case CCValAssign::SExt:
	RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
	DAG.getValueType(VA.getValVT()));
	break;
	case CCValAssign::ZExt:
	RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
	DAG.getValueType(VA.getValVT()));
	break;
	default:
	break;
	}

	// Truncate the register down to the return value type.
	if (VA.isExtInLoc())
	RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);

	InVals.push_back(RV);
	}

	return Chain;
	}

	//===----------------------------------------------------------------------===//
	// TargetLowering Implementation
	//===----------------------------------------------------------------------===//

	TargetLowering::AtomicExpansionKind SparcTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
	if (AI->getOperation() == AtomicRMWInst::Xchg &&
	AI->getType()->getPrimitiveSizeInBits() == 32)
	return AtomicExpansionKind::None; // Uses xchg instruction

	return AtomicExpansionKind::CmpXChg;
	}

	/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
	/// condition.
	static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
	switch (CC) {
	default: llvm_unreachable("Unknown integer condition code!");
	case ISD::SETEQ: return SPCC::ICC_E;
	case ISD::SETNE: return SPCC::ICC_NE;
	case ISD::SETLT: return SPCC::ICC_L;
	case ISD::SETGT: return SPCC::ICC_G;
	case ISD::SETLE: return SPCC::ICC_LE;
	case ISD::SETGE: return SPCC::ICC_GE;
	case ISD::SETULT: return SPCC::ICC_CS;
	case ISD::SETULE: return SPCC::ICC_LEU;
	case ISD::SETUGT: return SPCC::ICC_GU;
	case ISD::SETUGE: return SPCC::ICC_CC;
	}
	}

	/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
	/// FCC condition.
	static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
	switch (CC) {
	default: llvm_unreachable("Unknown fp condition code!");
	case ISD::SETEQ:
	case ISD::SETOEQ: return SPCC::FCC_E;
	case ISD::SETNE:
	case ISD::SETUNE: return SPCC::FCC_NE;
	case ISD::SETLT:
	case ISD::SETOLT: return SPCC::FCC_L;
	case ISD::SETGT:
	case ISD::SETOGT: return SPCC::FCC_G;
	case ISD::SETLE:
	case ISD::SETOLE: return SPCC::FCC_LE;
	case ISD::SETGE:
	case ISD::SETOGE: return SPCC::FCC_GE;
	case ISD::SETULT: return SPCC::FCC_UL;
	case ISD::SETULE: return SPCC::FCC_ULE;
	case ISD::SETUGT: return SPCC::FCC_UG;
	case ISD::SETUGE: return SPCC::FCC_UGE;
	case ISD::SETUO: return SPCC::FCC_U;
	case ISD::SETO: return SPCC::FCC_O;
	case ISD::SETONE: return SPCC::FCC_LG;
	case ISD::SETUEQ: return SPCC::FCC_UE;
	}
	}

	SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
	const SparcSubtarget &STI)
	: TargetLowering(TM), Subtarget(&STI) {
	MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));

	// Instructions which use registers as conditionals examine all the
	// bits (as does the pseudo SELECT_CC expansion). I don't think it
	// matters much whether it's ZeroOrOneBooleanContent, or
	// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
	// former.
	setBooleanContents(ZeroOrOneBooleanContent);
	setBooleanVectorContents(ZeroOrOneBooleanContent);

	// Set up the register classes.
	addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
	if (!Subtarget->useSoftFloat()) {
	addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
	addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
	addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
	}
	if (Subtarget->is64Bit()) {
	addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
	} else {
	// On 32bit sparc, we define a double-register 32bit register
	// class, as well. This is modeled in LLVM as a 2-vector of i32.
	addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);

	// ...but almost all operations must be expanded, so set that as
	// the default.
	for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
	setOperationAction(Op, MVT::v2i32, Expand);
	}
	// Truncating/extending stores/loads are also not supported.
	for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);

	setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
	setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
	setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);

	setTruncStoreAction(VT, MVT::v2i32, Expand);
	setTruncStoreAction(MVT::v2i32, VT, Expand);
	}
	// However, load and store are legal.
	setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
	setOperationAction(ISD::STORE, MVT::v2i32, Legal);
	setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
	setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);

	// And we need to promote i64 loads/stores into vector load/store
	setOperationAction(ISD::LOAD, MVT::i64, Custom);
	setOperationAction(ISD::STORE, MVT::i64, Custom);

	// Sadly, this doesn't work:
	// AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
	// AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
	}

	// Turn FP extload into load/fpextend
	for (MVT VT : MVT::fp_valuetypes()) {
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
	setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
	}

	// Sparc doesn't have i1 sign extending load
	for (MVT VT : MVT::integer_valuetypes())
	setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);

	// Turn FP truncstore into trunc + store.
	setTruncStoreAction(MVT::f32, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f16, Expand);
	setTruncStoreAction(MVT::f64, MVT::f32, Expand);
	setTruncStoreAction(MVT::f128, MVT::f16, Expand);
	setTruncStoreAction(MVT::f128, MVT::f32, Expand);
	setTruncStoreAction(MVT::f128, MVT::f64, Expand);

	// Custom legalize GlobalAddress nodes into LO/HI parts.
	setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
	setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
	setOperationAction(ISD::ConstantPool, PtrVT, Custom);
	setOperationAction(ISD::BlockAddress, PtrVT, Custom);

	// Sparc doesn't have sext_inreg, replace them with shl/sra
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
	setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);

	// Sparc has no REM or DIVREM operations.
	setOperationAction(ISD::UREM, MVT::i32, Expand);
	setOperationAction(ISD::SREM, MVT::i32, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i32, Expand);

	// ... nor does SparcV9.
	if (Subtarget->is64Bit()) {
	setOperationAction(ISD::UREM, MVT::i64, Expand);
	setOperationAction(ISD::SREM, MVT::i64, Expand);
	setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
	setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
	}

	// Custom expand fp<->sint
	setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
	setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);

	// Custom Expand fp<->uint
	setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
	setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
	setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);

	// Lower f16 conversion operations into library calls
	setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
	setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
	setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);

	setOperationAction(ISD::BITCAST, MVT::f32, Expand);
	setOperationAction(ISD::BITCAST, MVT::i32, Expand);

	// Sparc has no select or setcc: expand to SELECT_CC.
	setOperationAction(ISD::SELECT, MVT::i32, Expand);
	setOperationAction(ISD::SELECT, MVT::f32, Expand);
	setOperationAction(ISD::SELECT, MVT::f64, Expand);
	setOperationAction(ISD::SELECT, MVT::f128, Expand);

	setOperationAction(ISD::SETCC, MVT::i32, Expand);
	setOperationAction(ISD::SETCC, MVT::f32, Expand);
	setOperationAction(ISD::SETCC, MVT::f64, Expand);
	setOperationAction(ISD::SETCC, MVT::f128, Expand);

	// Sparc doesn't have BRCOND either, it has BR_CC.
	setOperationAction(ISD::BRCOND, MVT::Other, Expand);
	setOperationAction(ISD::BRIND, MVT::Other, Expand);
	setOperationAction(ISD::BR_JT, MVT::Other, Expand);
	setOperationAction(ISD::BR_CC, MVT::i32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f32, Custom);
	setOperationAction(ISD::BR_CC, MVT::f64, Custom);
	setOperationAction(ISD::BR_CC, MVT::f128, Custom);

	setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);

	setOperationAction(ISD::ADDC, MVT::i32, Custom);
	setOperationAction(ISD::ADDE, MVT::i32, Custom);
	setOperationAction(ISD::SUBC, MVT::i32, Custom);
	setOperationAction(ISD::SUBE, MVT::i32, Custom);

	if (Subtarget->is64Bit()) {
	setOperationAction(ISD::ADDC, MVT::i64, Custom);
	setOperationAction(ISD::ADDE, MVT::i64, Custom);
	setOperationAction(ISD::SUBC, MVT::i64, Custom);
	setOperationAction(ISD::SUBE, MVT::i64, Custom);
	setOperationAction(ISD::BITCAST, MVT::f64, Expand);
	setOperationAction(ISD::BITCAST, MVT::i64, Expand);
	setOperationAction(ISD::SELECT, MVT::i64, Expand);
	setOperationAction(ISD::SETCC, MVT::i64, Expand);
	setOperationAction(ISD::BR_CC, MVT::i64, Custom);
	setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);

	setOperationAction(ISD::CTPOP, MVT::i64,
	Subtarget->usePopc() ? Legal : Expand);
	setOperationAction(ISD::CTTZ , MVT::i64, Expand);
	setOperationAction(ISD::CTLZ , MVT::i64, Expand);
	setOperationAction(ISD::BSWAP, MVT::i64, Expand);
	setOperationAction(ISD::ROTL , MVT::i64, Expand);
	setOperationAction(ISD::ROTR , MVT::i64, Expand);
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
	}

	// ATOMICs.
	// Atomics are supported on SparcV9. 32-bit atomics are also
	// supported by some Leon SparcV8 variants. Otherwise, atomics
	// are unsupported.
	if (Subtarget->isV9())
	setMaxAtomicSizeInBitsSupported(64);
	else if (Subtarget->hasLeonCasa())
	setMaxAtomicSizeInBitsSupported(32);
	else
	setMaxAtomicSizeInBitsSupported(0);

	setMinCmpXchgSizeInBits(32);

	setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Legal);

	setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Legal);

	// Custom Lower Atomic LOAD/STORE
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);

	if (Subtarget->is64Bit()) {
	setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Legal);
	setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Legal);
	setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
	setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
	}

	if (!Subtarget->is64Bit()) {
	// These libcalls are not available in 32-bit.
	setLibcallName(RTLIB::MULO_I64, nullptr);
	setLibcallName(RTLIB::SHL_I128, nullptr);
	setLibcallName(RTLIB::SRL_I128, nullptr);
	setLibcallName(RTLIB::SRA_I128, nullptr);
	}

	setLibcallName(RTLIB::MULO_I128, nullptr);

	if (!Subtarget->isV9()) {
	// SparcV8 does not have FNEGD and FABSD.
	setOperationAction(ISD::FNEG, MVT::f64, Custom);
	setOperationAction(ISD::FABS, MVT::f64, Custom);
	}

	setOperationAction(ISD::FSIN , MVT::f128, Expand);
	setOperationAction(ISD::FCOS , MVT::f128, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
	setOperationAction(ISD::FREM , MVT::f128, Expand);
	setOperationAction(ISD::FMA , MVT::f128, Expand);
	setOperationAction(ISD::FSIN , MVT::f64, Expand);
	setOperationAction(ISD::FCOS , MVT::f64, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
	setOperationAction(ISD::FREM , MVT::f64, Expand);
	setOperationAction(ISD::FMA , MVT::f64, Expand);
	setOperationAction(ISD::FSIN , MVT::f32, Expand);
	setOperationAction(ISD::FCOS , MVT::f32, Expand);
	setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
	setOperationAction(ISD::FREM , MVT::f32, Expand);
	setOperationAction(ISD::FMA , MVT::f32, Expand);
	setOperationAction(ISD::CTTZ , MVT::i32, Expand);
	setOperationAction(ISD::CTLZ , MVT::i32, Expand);
	setOperationAction(ISD::ROTL , MVT::i32, Expand);
	setOperationAction(ISD::ROTR , MVT::i32, Expand);
	setOperationAction(ISD::BSWAP, MVT::i32, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
	setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
	setOperationAction(ISD::FPOW , MVT::f128, Expand);
	setOperationAction(ISD::FPOW , MVT::f64, Expand);
	setOperationAction(ISD::FPOW , MVT::f32, Expand);

	setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
	setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
	setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);

	// Expands to [SU]MUL_LOHI.
	setOperationAction(ISD::MULHU, MVT::i32, Expand);
	setOperationAction(ISD::MULHS, MVT::i32, Expand);
	setOperationAction(ISD::MUL, MVT::i32, Expand);

	if (Subtarget->useSoftMulDiv()) {
	// .umul works for both signed and unsigned
	setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
	setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
	setLibcallName(RTLIB::MUL_I32, ".umul");

	setOperationAction(ISD::SDIV, MVT::i32, Expand);
	setLibcallName(RTLIB::SDIV_I32, ".div");

	setOperationAction(ISD::UDIV, MVT::i32, Expand);
	setLibcallName(RTLIB::UDIV_I32, ".udiv");

	setLibcallName(RTLIB::SREM_I32, ".rem");
	setLibcallName(RTLIB::UREM_I32, ".urem");
	}

	if (Subtarget->is64Bit()) {
	setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
	setOperationAction(ISD::MULHU, MVT::i64, Expand);
	setOperationAction(ISD::MULHS, MVT::i64, Expand);

	setOperationAction(ISD::UMULO, MVT::i64, Custom);
	setOperationAction(ISD::SMULO, MVT::i64, Custom);

	setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
	setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
	setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
	}

	// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
	setOperationAction(ISD::VASTART , MVT::Other, Custom);
	// VAARG needs to be lowered to not do unaligned accesses for doubles.
	setOperationAction(ISD::VAARG , MVT::Other, Custom);

	setOperationAction(ISD::TRAP , MVT::Other, Legal);
	setOperationAction(ISD::DEBUGTRAP , MVT::Other, Legal);

	// Use the default implementation.
	setOperationAction(ISD::VACOPY , MVT::Other, Expand);
	setOperationAction(ISD::VAEND , MVT::Other, Expand);
	setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
	setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
	setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);

	setStackPointerRegisterToSaveRestore(SP::O6);

	setOperationAction(ISD::CTPOP, MVT::i32,
	Subtarget->usePopc() ? Legal : Expand);

	if (Subtarget->isV9() && Subtarget->hasHardQuad()) {
	setOperationAction(ISD::LOAD, MVT::f128, Legal);
	setOperationAction(ISD::STORE, MVT::f128, Legal);
	} else {
	setOperationAction(ISD::LOAD, MVT::f128, Custom);
	setOperationAction(ISD::STORE, MVT::f128, Custom);
	}

	if (Subtarget->hasHardQuad()) {
	setOperationAction(ISD::FADD, MVT::f128, Legal);
	setOperationAction(ISD::FSUB, MVT::f128, Legal);
	setOperationAction(ISD::FMUL, MVT::f128, Legal);
	setOperationAction(ISD::FDIV, MVT::f128, Legal);
	setOperationAction(ISD::FSQRT, MVT::f128, Legal);
	setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
	setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
	if (Subtarget->isV9()) {
	setOperationAction(ISD::FNEG, MVT::f128, Legal);
	setOperationAction(ISD::FABS, MVT::f128, Legal);
	} else {
	setOperationAction(ISD::FNEG, MVT::f128, Custom);
	setOperationAction(ISD::FABS, MVT::f128, Custom);
	}

	if (!Subtarget->is64Bit()) {
	setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
	setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
	setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
	setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
	}

	} else {
	// Custom legalize f128 operations.

	setOperationAction(ISD::FADD, MVT::f128, Custom);
	setOperationAction(ISD::FSUB, MVT::f128, Custom);
	setOperationAction(ISD::FMUL, MVT::f128, Custom);
	setOperationAction(ISD::FDIV, MVT::f128, Custom);
	setOperationAction(ISD::FSQRT, MVT::f128, Custom);
	setOperationAction(ISD::FNEG, MVT::f128, Custom);
	setOperationAction(ISD::FABS, MVT::f128, Custom);

	setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
	setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);

	// Setup Runtime library names.
	if (Subtarget->is64Bit() && !Subtarget->useSoftFloat()) {
	setLibcallName(RTLIB::ADD_F128, "_Qp_add");
	setLibcallName(RTLIB::SUB_F128, "_Qp_sub");
	setLibcallName(RTLIB::MUL_F128, "_Qp_mul");
	setLibcallName(RTLIB::DIV_F128, "_Qp_div");
	setLibcallName(RTLIB::SQRT_F128, "_Qp_sqrt");
	setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Qp_qtoi");
	setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Qp_qtoui");
	setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Qp_itoq");
	setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Qp_uitoq");
	setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Qp_qtox");
	setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Qp_qtoux");
	setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Qp_xtoq");
	setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Qp_uxtoq");
	setLibcallName(RTLIB::FPEXT_F32_F128, "_Qp_stoq");
	setLibcallName(RTLIB::FPEXT_F64_F128, "_Qp_dtoq");
	setLibcallName(RTLIB::FPROUND_F128_F32, "_Qp_qtos");
	setLibcallName(RTLIB::FPROUND_F128_F64, "_Qp_qtod");
	} else if (!Subtarget->useSoftFloat()) {
	setLibcallName(RTLIB::ADD_F128, "_Q_add");
	setLibcallName(RTLIB::SUB_F128, "_Q_sub");
	setLibcallName(RTLIB::MUL_F128, "_Q_mul");
	setLibcallName(RTLIB::DIV_F128, "_Q_div");
	setLibcallName(RTLIB::SQRT_F128, "_Q_sqrt");
	setLibcallName(RTLIB::FPTOSINT_F128_I32, "_Q_qtoi");
	setLibcallName(RTLIB::FPTOUINT_F128_I32, "_Q_qtou");
	setLibcallName(RTLIB::SINTTOFP_I32_F128, "_Q_itoq");
	setLibcallName(RTLIB::UINTTOFP_I32_F128, "_Q_utoq");
	setLibcallName(RTLIB::FPTOSINT_F128_I64, "_Q_qtoll");
	setLibcallName(RTLIB::FPTOUINT_F128_I64, "_Q_qtoull");
	setLibcallName(RTLIB::SINTTOFP_I64_F128, "_Q_lltoq");
	setLibcallName(RTLIB::UINTTOFP_I64_F128, "_Q_ulltoq");
	setLibcallName(RTLIB::FPEXT_F32_F128, "_Q_stoq");
	setLibcallName(RTLIB::FPEXT_F64_F128, "_Q_dtoq");
	setLibcallName(RTLIB::FPROUND_F128_F32, "_Q_qtos");
	setLibcallName(RTLIB::FPROUND_F128_F64, "_Q_qtod");
	}
	}

	if (Subtarget->fixAllFDIVSQRT()) {
	// Promote FDIVS and FSQRTS to FDIVD and FSQRTD instructions instead as
	// the former instructions generate errata on LEON processors.
	setOperationAction(ISD::FDIV, MVT::f32, Promote);
	setOperationAction(ISD::FSQRT, MVT::f32, Promote);
	}

	if (Subtarget->hasNoFMULS()) {
	setOperationAction(ISD::FMUL, MVT::f32, Promote);
	}

	// Custom combine bitcast between f64 and v2i32
	if (!Subtarget->is64Bit())
	setTargetDAGCombine(ISD::BITCAST);

	if (Subtarget->hasLeonCycleCounter())
	setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);

	setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

	setMinFunctionAlignment(Align(4));

	computeRegisterProperties(Subtarget->getRegisterInfo());
	}

	bool SparcTargetLowering::useSoftFloat() const {
	return Subtarget->useSoftFloat();
	}

	const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
	switch ((SPISD::NodeType)Opcode) {
	case SPISD::FIRST_NUMBER: break;
	case SPISD::CMPICC: return "SPISD::CMPICC";
	case SPISD::CMPFCC: return "SPISD::CMPFCC";
	case SPISD::BRICC: return "SPISD::BRICC";
	case SPISD::BRXCC: return "SPISD::BRXCC";
	case SPISD::BRFCC: return "SPISD::BRFCC";
	case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
	case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
	case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
	case SPISD::Hi: return "SPISD::Hi";
	case SPISD::Lo: return "SPISD::Lo";
	case SPISD::FTOI: return "SPISD::FTOI";
	case SPISD::ITOF: return "SPISD::ITOF";
	case SPISD::FTOX: return "SPISD::FTOX";
	case SPISD::XTOF: return "SPISD::XTOF";
	case SPISD::CALL: return "SPISD::CALL";
	case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
	case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
	case SPISD::FLUSHW: return "SPISD::FLUSHW";
	case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
	case SPISD::TLS_LD: return "SPISD::TLS_LD";
	case SPISD::TLS_CALL: return "SPISD::TLS_CALL";
	case SPISD::TAIL_CALL: return "SPISD::TAIL_CALL";
	case SPISD::LOAD_GDOP: return "SPISD::LOAD_GDOP";
	}
	return nullptr;
	}

	EVT SparcTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
	EVT VT) const {
	if (!VT.isVector())
	return MVT::i32;
	return VT.changeVectorElementTypeToInteger();
	}

	/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
	/// be zero. Op is expected to be a target specific node. Used by DAG
	/// combiner.
	void SparcTargetLowering::computeKnownBitsForTargetNode
	(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth) const {
	KnownBits Known2;
	Known.resetAll();

	switch (Op.getOpcode()) {
	default: break;
	case SPISD::SELECT_ICC:
	case SPISD::SELECT_XCC:
	case SPISD::SELECT_FCC:
	Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
	Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);

	// Only known if known in both the LHS and RHS.
	Known = KnownBits::commonBits(Known, Known2);
	break;
	}
	}

	// Look at LHS/RHS/CC and see if they are a lowered setcc instruction. If so
	// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
	static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
	ISD::CondCode CC, unsigned &SPCC) {
	if (isNullConstant(RHS) &&
	CC == ISD::SETNE &&
	(((LHS.getOpcode() == SPISD::SELECT_ICC \|\|
	LHS.getOpcode() == SPISD::SELECT_XCC) &&
	LHS.getOperand(3).getOpcode() == SPISD::CMPICC) \|\|
	(LHS.getOpcode() == SPISD::SELECT_FCC &&
	LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
	isOneConstant(LHS.getOperand(0)) &&
	isNullConstant(LHS.getOperand(1))) {
	SDValue CMPCC = LHS.getOperand(3);
	SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
	LHS = CMPCC.getOperand(0);
	RHS = CMPCC.getOperand(1);
	}
	}

	// Convert to a target node and set target flags.
	SDValue SparcTargetLowering::withTargetFlags(SDValue Op, unsigned TF,
	SelectionDAG &DAG) const {
	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
	return DAG.getTargetGlobalAddress(GA->getGlobal(),
	SDLoc(GA),
	GA->getValueType(0),
	GA->getOffset(), TF);

	if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
	return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
	CP->getAlign(), CP->getOffset(), TF);

	if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
	return DAG.getTargetBlockAddress(BA->getBlockAddress(),
	Op.getValueType(),
	0,
	TF);

	if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
	return DAG.getTargetExternalSymbol(ES->getSymbol(),
	ES->getValueType(0), TF);

	llvm_unreachable("Unhandled address SDNode");
	}

	// Split Op into high and low parts according to HiTF and LoTF.
	// Return an ADD node combining the parts.
	SDValue SparcTargetLowering::makeHiLoPair(SDValue Op,
	unsigned HiTF, unsigned LoTF,
	SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT VT = Op.getValueType();
	SDValue Hi = DAG.getNode(SPISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
	SDValue Lo = DAG.getNode(SPISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
	return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
	}

	// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
	// or ExternalSymbol SDNode.
	SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
	SDLoc DL(Op);
	EVT VT = getPointerTy(DAG.getDataLayout());

	// Handle PIC mode first. SPARC needs a got load for every variable!
	if (isPositionIndependent()) {
	const Module *M = DAG.getMachineFunction().getFunction().getParent();
	PICLevel::Level picLevel = M->getPICLevel();
	SDValue Idx;

	if (picLevel == PICLevel::SmallPIC) {
	// This is the pic13 code model, the GOT is known to be smaller than 8KiB.
	Idx = DAG.getNode(SPISD::Lo, DL, Op.getValueType(),
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_GOT13, DAG));
	} else {
	// This is the pic32 code model, the GOT is known to be smaller than 4GB.
	Idx = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_GOT22,
	SparcMCExpr::VK_Sparc_GOT10, DAG);
	}

	SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, VT);
	SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Idx);
	// GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
	// function has calls.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setHasCalls(true);
	return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
	MachinePointerInfo::getGOT(DAG.getMachineFunction()));
	}

	// This is one of the absolute code models.
	switch(getTargetMachine().getCodeModel()) {
	default:
	llvm_unreachable("Unsupported absolute code model");
	case CodeModel::Small:
	// abs32.
	return makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HI,
	SparcMCExpr::VK_Sparc_LO, DAG);
	case CodeModel::Medium: {
	// abs44.
	SDValue H44 = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_H44,
	SparcMCExpr::VK_Sparc_M44, DAG);
	H44 = DAG.getNode(ISD::SHL, DL, VT, H44, DAG.getConstant(12, DL, MVT::i32));
	SDValue L44 = withTargetFlags(Op, SparcMCExpr::VK_Sparc_L44, DAG);
	L44 = DAG.getNode(SPISD::Lo, DL, VT, L44);
	return DAG.getNode(ISD::ADD, DL, VT, H44, L44);
	}
	case CodeModel::Large: {
	// abs64.
	SDValue Hi = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HH,
	SparcMCExpr::VK_Sparc_HM, DAG);
	Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, DAG.getConstant(32, DL, MVT::i32));
	SDValue Lo = makeHiLoPair(Op, SparcMCExpr::VK_Sparc_HI,
	SparcMCExpr::VK_Sparc_LO, DAG);
	return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
	}
	}
	}

	SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
	SelectionDAG &DAG) const {
	return makeAddress(Op, DAG);
	}

	SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
	SelectionDAG &DAG) const {
	return makeAddress(Op, DAG);
	}

	SDValue SparcTargetLowering::LowerBlockAddress(SDValue Op,
	SelectionDAG &DAG) const {
	return makeAddress(Op, DAG);
	}

	SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
	SelectionDAG &DAG) const {

	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
	if (DAG.getTarget().useEmulatedTLS())
	return LowerToTLSEmulatedModel(GA, DAG);

	SDLoc DL(GA);
	const GlobalValue *GV = GA->getGlobal();
	EVT PtrVT = getPointerTy(DAG.getDataLayout());

	TLSModel::Model model = getTargetMachine().getTLSModel(GV);

	if (model == TLSModel::GeneralDynamic \|\| model == TLSModel::LocalDynamic) {
	unsigned HiTF = ((model == TLSModel::GeneralDynamic)
	? SparcMCExpr::VK_Sparc_TLS_GD_HI22
	: SparcMCExpr::VK_Sparc_TLS_LDM_HI22);
	unsigned LoTF = ((model == TLSModel::GeneralDynamic)
	? SparcMCExpr::VK_Sparc_TLS_GD_LO10
	: SparcMCExpr::VK_Sparc_TLS_LDM_LO10);
	unsigned addTF = ((model == TLSModel::GeneralDynamic)
	? SparcMCExpr::VK_Sparc_TLS_GD_ADD
	: SparcMCExpr::VK_Sparc_TLS_LDM_ADD);
	unsigned callTF = ((model == TLSModel::GeneralDynamic)
	? SparcMCExpr::VK_Sparc_TLS_GD_CALL
	: SparcMCExpr::VK_Sparc_TLS_LDM_CALL);

	SDValue HiLo = makeHiLoPair(Op, HiTF, LoTF, DAG);
	SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);
	SDValue Argument = DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Base, HiLo,
	withTargetFlags(Op, addTF, DAG));

	SDValue Chain = DAG.getEntryNode();
	SDValue InFlag;

	Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL);
	Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
	InFlag = Chain.getValue(1);
	SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
	SDValue Symbol = withTargetFlags(Op, callTF, DAG);

	SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
	const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
	DAG.getMachineFunction(), CallingConv::C);
	assert(Mask && "Missing call preserved mask for calling convention");
	SDValue Ops[] = {Chain,
	Callee,
	Symbol,
	DAG.getRegister(SP::O0, PtrVT),
	DAG.getRegisterMask(Mask),
	InFlag};
	Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
	InFlag = Chain.getValue(1);
	Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, DL, true),
	DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
	InFlag = Chain.getValue(1);
	SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);

	if (model != TLSModel::LocalDynamic)
	return Ret;

	SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_HIX22, DAG));
	SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_LOX10, DAG));
	HiLo = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);
	return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT, Ret, HiLo,
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LDO_ADD, DAG));
	}

	if (model == TLSModel::InitialExec) {
	unsigned ldTF = ((PtrVT == MVT::i64)? SparcMCExpr::VK_Sparc_TLS_IE_LDX
	: SparcMCExpr::VK_Sparc_TLS_IE_LD);

	SDValue Base = DAG.getNode(SPISD::GLOBAL_BASE_REG, DL, PtrVT);

	// GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
	// function has calls.
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setHasCalls(true);

	SDValue TGA = makeHiLoPair(Op,
	SparcMCExpr::VK_Sparc_TLS_IE_HI22,
	SparcMCExpr::VK_Sparc_TLS_IE_LO10, DAG);
	SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base, TGA);
	SDValue Offset = DAG.getNode(SPISD::TLS_LD,
	DL, PtrVT, Ptr,
	withTargetFlags(Op, ldTF, DAG));
	return DAG.getNode(SPISD::TLS_ADD, DL, PtrVT,
	DAG.getRegister(SP::G7, PtrVT), Offset,
	withTargetFlags(Op,
	SparcMCExpr::VK_Sparc_TLS_IE_ADD, DAG));
	}

	assert(model == TLSModel::LocalExec);
	SDValue Hi = DAG.getNode(SPISD::Hi, DL, PtrVT,
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LE_HIX22, DAG));
	SDValue Lo = DAG.getNode(SPISD::Lo, DL, PtrVT,
	withTargetFlags(Op, SparcMCExpr::VK_Sparc_TLS_LE_LOX10, DAG));
	SDValue Offset = DAG.getNode(ISD::XOR, DL, PtrVT, Hi, Lo);

	return DAG.getNode(ISD::ADD, DL, PtrVT,
	DAG.getRegister(SP::G7, PtrVT), Offset);
	}

	SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
	ArgListTy &Args, SDValue Arg,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	EVT ArgVT = Arg.getValueType();
	Type ArgTy = ArgVT.getTypeForEVT(DAG.getContext());

	ArgListEntry Entry;
	Entry.Node = Arg;
	Entry.Ty = ArgTy;

	if (ArgTy->isFP128Ty()) {
	// Create a stack object and pass the pointer to the library function.
	int FI = MFI.CreateStackObject(16, Align(8), false);
	SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
	Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(),
	Align(8));

	Entry.Node = FIPtr;
	Entry.Ty = PointerType::getUnqual(ArgTy);
	}
	Args.push_back(Entry);
	return Chain;
	}

	SDValue
	SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
	const char *LibFuncName,
	unsigned numArgs) const {

	ArgListTy Args;

	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	auto PtrVT = getPointerTy(DAG.getDataLayout());

	SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT);
	Type RetTy = Op.getValueType().getTypeForEVT(DAG.getContext());
	Type *RetTyABI = RetTy;
	SDValue Chain = DAG.getEntryNode();
	SDValue RetPtr;

	if (RetTy->isFP128Ty()) {
	// Create a Stack Object to receive the return value of type f128.
	ArgListEntry Entry;
	int RetFI = MFI.CreateStackObject(16, Align(8), false);
	RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
	Entry.Node = RetPtr;
	Entry.Ty = PointerType::getUnqual(RetTy);
	if (!Subtarget->is64Bit()) {
	Entry.IsSRet = true;
	Entry.IndirectType = RetTy;
	}
	Entry.IsReturned = false;
	Args.push_back(Entry);
	RetTyABI = Type::getVoidTy(*DAG.getContext());
	}

	assert(Op->getNumOperands() >= numArgs && "Not enough operands!");
	for (unsigned i = 0, e = numArgs; i != e; ++i) {
	Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG);
	}
	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(SDLoc(Op)).setChain(Chain)
	.setCallee(CallingConv::C, RetTyABI, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);

	// chain is in second result.
	if (RetTyABI == RetTy)
	return CallInfo.first;

	assert (RetTy->isFP128Ty() && "Unexpected return type!");

	Chain = CallInfo.second;

	// Load RetPtr to get the return value.
	return DAG.getLoad(Op.getValueType(), SDLoc(Op), Chain, RetPtr,
	MachinePointerInfo(), Align(8));
	}

	SDValue SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
	unsigned &SPCC, const SDLoc &DL,
	SelectionDAG &DAG) const {

	const char *LibCall = nullptr;
	bool is64Bit = Subtarget->is64Bit();
	switch(SPCC) {
	default: llvm_unreachable("Unhandled conditional code!");
	case SPCC::FCC_E : LibCall = is64Bit? "_Qp_feq" : "_Q_feq"; break;
	case SPCC::FCC_NE : LibCall = is64Bit? "_Qp_fne" : "_Q_fne"; break;
	case SPCC::FCC_L : LibCall = is64Bit? "_Qp_flt" : "_Q_flt"; break;
	case SPCC::FCC_G : LibCall = is64Bit? "_Qp_fgt" : "_Q_fgt"; break;
	case SPCC::FCC_LE : LibCall = is64Bit? "_Qp_fle" : "_Q_fle"; break;
	case SPCC::FCC_GE : LibCall = is64Bit? "_Qp_fge" : "_Q_fge"; break;
	case SPCC::FCC_UL :
	case SPCC::FCC_ULE:
	case SPCC::FCC_UG :
	case SPCC::FCC_UGE:
	case SPCC::FCC_U :
	case SPCC::FCC_O :
	case SPCC::FCC_LG :
	case SPCC::FCC_UE : LibCall = is64Bit? "_Qp_cmp" : "_Q_cmp"; break;
	}

	auto PtrVT = getPointerTy(DAG.getDataLayout());
	SDValue Callee = DAG.getExternalSymbol(LibCall, PtrVT);
	Type RetTy = Type::getInt32Ty(DAG.getContext());
	ArgListTy Args;
	SDValue Chain = DAG.getEntryNode();
	Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG);
	Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG);

	TargetLowering::CallLoweringInfo CLI(DAG);
	CLI.setDebugLoc(DL).setChain(Chain)
	.setCallee(CallingConv::C, RetTy, Callee, std::move(Args));

	std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);

	// result is in first, and chain is in second result.
	SDValue Result = CallInfo.first;

	switch(SPCC) {
	default: {
	SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_UL : {
	SDValue Mask = DAG.getConstant(1, DL, Result.getValueType());
	Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
	SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_ULE: {
	SDValue RHS = DAG.getConstant(2, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_UG : {
	SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
	SPCC = SPCC::ICC_G;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_UGE: {
	SDValue RHS = DAG.getConstant(1, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}

	case SPCC::FCC_U : {
	SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
	SPCC = SPCC::ICC_E;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_O : {
	SDValue RHS = DAG.getConstant(3, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_LG : {
	SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
	Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
	SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
	SPCC = SPCC::ICC_NE;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	case SPCC::FCC_UE : {
	SDValue Mask = DAG.getConstant(3, DL, Result.getValueType());
	Result = DAG.getNode(ISD::AND, DL, Result.getValueType(), Result, Mask);
	SDValue RHS = DAG.getConstant(0, DL, Result.getValueType());
	SPCC = SPCC::ICC_E;
	return DAG.getNode(SPISD::CMPICC, DL, MVT::Glue, Result, RHS);
	}
	}
	}

	static SDValue
	LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI) {

	if (Op.getOperand(0).getValueType() == MVT::f64)
	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(RTLIB::FPEXT_F64_F128), 1);

	if (Op.getOperand(0).getValueType() == MVT::f32)
	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1);

	llvm_unreachable("fpextend with non-float operand!");
	return SDValue();
	}

	static SDValue
	LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI) {
	// FP_ROUND on f64 and f32 are legal.
	if (Op.getOperand(0).getValueType() != MVT::f128)
	return Op;

	if (Op.getValueType() == MVT::f64)
	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(RTLIB::FPROUND_F128_F64), 1);
	if (Op.getValueType() == MVT::f32)
	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1);

	llvm_unreachable("fpround to non-float!");
	return SDValue();
	}

	static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();
	assert(VT == MVT::i32 \|\| VT == MVT::i64);

	// Expand f128 operations to fp128 abi calls.
	if (Op.getOperand(0).getValueType() == MVT::f128
	&& (!hasHardQuad \|\| !TLI.isTypeLegal(VT))) {
	const char *libName = TLI.getLibcallName(VT == MVT::i32
	? RTLIB::FPTOSINT_F128_I32
	: RTLIB::FPTOSINT_F128_I64);
	return TLI.LowerF128Op(Op, DAG, libName, 1);
	}

	// Expand if the resulting type is illegal.
	if (!TLI.isTypeLegal(VT))
	return SDValue();

	// Otherwise, Convert the fp value to integer in an FP register.
	if (VT == MVT::i32)
	Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
	else
	Op = DAG.getNode(SPISD::FTOX, dl, MVT::f64, Op.getOperand(0));

	return DAG.getNode(ISD::BITCAST, dl, VT, Op);
	}

	static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDLoc dl(Op);
	EVT OpVT = Op.getOperand(0).getValueType();
	assert(OpVT == MVT::i32 \|\| (OpVT == MVT::i64));

	EVT floatVT = (OpVT == MVT::i32) ? MVT::f32 : MVT::f64;

	// Expand f128 operations to fp128 ABI calls.
	if (Op.getValueType() == MVT::f128
	&& (!hasHardQuad \|\| !TLI.isTypeLegal(OpVT))) {
	const char *libName = TLI.getLibcallName(OpVT == MVT::i32
	? RTLIB::SINTTOFP_I32_F128
	: RTLIB::SINTTOFP_I64_F128);
	return TLI.LowerF128Op(Op, DAG, libName, 1);
	}

	// Expand if the operand type is illegal.
	if (!TLI.isTypeLegal(OpVT))
	return SDValue();

	// Otherwise, Convert the int value to FP in an FP register.
	SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
	unsigned opcode = (OpVT == MVT::i32)? SPISD::ITOF : SPISD::XTOF;
	return DAG.getNode(opcode, dl, Op.getValueType(), Tmp);
	}

	static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDLoc dl(Op);
	EVT VT = Op.getValueType();

	// Expand if it does not involve f128 or the target has support for
	// quad floating point instructions and the resulting type is legal.
	if (Op.getOperand(0).getValueType() != MVT::f128 \|\|
	(hasHardQuad && TLI.isTypeLegal(VT)))
	return SDValue();

	assert(VT == MVT::i32 \|\| VT == MVT::i64);

	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(VT == MVT::i32
	? RTLIB::FPTOUINT_F128_I32
	: RTLIB::FPTOUINT_F128_I64),
	1);
	}

	static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDLoc dl(Op);
	EVT OpVT = Op.getOperand(0).getValueType();
	assert(OpVT == MVT::i32 \|\| OpVT == MVT::i64);

	// Expand if it does not involve f128 or the target has support for
	// quad floating point instructions and the operand type is legal.
	if (Op.getValueType() != MVT::f128 \|\| (hasHardQuad && TLI.isTypeLegal(OpVT)))
	return SDValue();

	return TLI.LowerF128Op(Op, DAG,
	TLI.getLibcallName(OpVT == MVT::i32
	? RTLIB::UINTTOFP_I32_F128
	: RTLIB::UINTTOFP_I64_F128),
	1);
	}

	static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDValue Chain = Op.getOperand(0);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
	SDValue LHS = Op.getOperand(2);
	SDValue RHS = Op.getOperand(3);
	SDValue Dest = Op.getOperand(4);
	SDLoc dl(Op);
	unsigned Opc, SPCC = ~0U;

	// If this is a br_cc of a "setcc", and if the setcc got lowered into
	// an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
	LookThroughSetCC(LHS, RHS, CC, SPCC);

	// Get the condition flag.
	SDValue CompareFlag;
	if (LHS.getValueType().isInteger()) {
	CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
	if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
	// 32-bit compares use the icc flags, 64-bit uses the xcc flags.
	Opc = LHS.getValueType() == MVT::i32 ? SPISD::BRICC : SPISD::BRXCC;
	} else {
	if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
	if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
	CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
	Opc = SPISD::BRICC;
	} else {
	CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
	if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
	Opc = SPISD::BRFCC;
	}
	}
	return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
	DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
	}

	static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	bool hasHardQuad) {
	SDValue LHS = Op.getOperand(0);
	SDValue RHS = Op.getOperand(1);
	ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
	SDValue TrueVal = Op.getOperand(2);
	SDValue FalseVal = Op.getOperand(3);
	SDLoc dl(Op);
	unsigned Opc, SPCC = ~0U;

	// If this is a select_cc of a "setcc", and if the setcc got lowered into
	// an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
	LookThroughSetCC(LHS, RHS, CC, SPCC);

	SDValue CompareFlag;
	if (LHS.getValueType().isInteger()) {
	CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
	Opc = LHS.getValueType() == MVT::i32 ?
	SPISD::SELECT_ICC : SPISD::SELECT_XCC;
	if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
	} else {
	if (!hasHardQuad && LHS.getValueType() == MVT::f128) {
	if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
	CompareFlag = TLI.LowerF128Compare(LHS, RHS, SPCC, dl, DAG);
	Opc = SPISD::SELECT_ICC;
	} else {
	CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
	Opc = SPISD::SELECT_FCC;
	if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
	}
	}
	return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
	DAG.getConstant(SPCC, dl, MVT::i32), CompareFlag);
	}

	static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI) {
	MachineFunction &MF = DAG.getMachineFunction();
	SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
	auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());

	// Need frame address to find the address of VarArgsFrameIndex.
	MF.getFrameInfo().setFrameAddressIsTaken(true);

	// vastart just stores the address of the VarArgsFrameIndex slot into the
	// memory location argument.
	SDLoc DL(Op);
	SDValue Offset =
	DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(SP::I6, PtrVT),
	DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
	const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
	return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
	MachinePointerInfo(SV));
	}

	static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
	SDNode *Node = Op.getNode();
	EVT VT = Node->getValueType(0);
	SDValue InChain = Node->getOperand(0);
	SDValue VAListPtr = Node->getOperand(1);
	EVT PtrVT = VAListPtr.getValueType();
	const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
	SDLoc DL(Node);
	SDValue VAList =
	DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
	// Increment the pointer, VAList, to the next vaarg.
	SDValue NextPtr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
	DAG.getIntPtrConstant(VT.getSizeInBits()/8,
	DL));
	// Store the incremented VAList to the legalized pointer.
	InChain = DAG.getStore(VAList.getValue(1), DL, NextPtr, VAListPtr,
	MachinePointerInfo(SV));
	// Load the actual argument out of the pointer VAList.
	// We can't count on greater alignment than the word size.
	return DAG.getLoad(
	VT, DL, InChain, VAList, MachinePointerInfo(),
	std::min(PtrVT.getFixedSizeInBits(), VT.getFixedSizeInBits()) / 8);
	}

	static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
	const SparcSubtarget *Subtarget) {
	SDValue Chain = Op.getOperand(0); // Legalize the chain.
	SDValue Size = Op.getOperand(1); // Legalize the size.
	MaybeAlign Alignment =
	cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
	Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
	EVT VT = Size->getValueType(0);
	SDLoc dl(Op);

	// TODO: implement over-aligned alloca. (Note: also implies
	// supporting support for overaligned function frames + dynamic
	// allocations, at all, which currently isn't supported)
	if (Alignment && *Alignment > StackAlign) {
	const MachineFunction &MF = DAG.getMachineFunction();
	report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
	"over-aligned dynamic alloca not supported.");
	}

	// The resultant pointer needs to be above the register spill area
	// at the bottom of the stack.
	unsigned regSpillArea;
	if (Subtarget->is64Bit()) {
	regSpillArea = 128;
	} else {
	// On Sparc32, the size of the spill area is 92. Unfortunately,
	// that's only 4-byte aligned, not 8-byte aligned (the stack
	// pointer is 8-byte aligned). So, if the user asked for an 8-byte
	// aligned dynamic allocation, we actually need to add 96 to the
	// bottom of the stack, instead of 92, to ensure 8-byte alignment.

	// That also means adding 4 to the size of the allocation --
	// before applying the 8-byte rounding. Unfortunately, we the
	// value we get here has already had rounding applied. So, we need
	// to add 8, instead, wasting a bit more memory.

	// Further, this only actually needs to be done if the required
	// alignment is > 4, but, we've lost that info by this point, too,
	// so we always apply it.

	// (An alternative approach would be to always reserve 96 bytes
	// instead of the required 92, but then we'd waste 4 extra bytes
	// in every frame, not just those with dynamic stack allocations)

	// TODO: modify code in SelectionDAGBuilder to make this less sad.

	Size = DAG.getNode(ISD::ADD, dl, VT, Size,
	DAG.getConstant(8, dl, VT));
	regSpillArea = 96;
	}

	unsigned SPReg = SP::O6;
	SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
	SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
	Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain

	regSpillArea += Subtarget->getStackPointerBias();

	SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
	DAG.getConstant(regSpillArea, dl, VT));
	SDValue Ops[2] = { NewVal, Chain };
	return DAG.getMergeValues(Ops, dl);
	}


	static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
	SDLoc dl(Op);
	SDValue Chain = DAG.getNode(SPISD::FLUSHW,
	dl, MVT::Other, DAG.getEntryNode());
	return Chain;
	}

	static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
	const SparcSubtarget *Subtarget,
	bool AlwaysFlush = false) {
	MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
	MFI.setFrameAddressIsTaken(true);

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	unsigned FrameReg = SP::I6;
	unsigned stackBias = Subtarget->getStackPointerBias();

	SDValue FrameAddr;
	SDValue Chain;

	// flush first to make sure the windowed registers' values are in stack
	Chain = (depth \|\| AlwaysFlush) ? getFLUSHW(Op, DAG) : DAG.getEntryNode();

	FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);

	unsigned Offset = (Subtarget->is64Bit()) ? (stackBias + 112) : 56;

	while (depth--) {
	SDValue Ptr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
	DAG.getIntPtrConstant(Offset, dl));
	FrameAddr = DAG.getLoad(VT, dl, Chain, Ptr, MachinePointerInfo());
	}
	if (Subtarget->is64Bit())
	FrameAddr = DAG.getNode(ISD::ADD, dl, VT, FrameAddr,
	DAG.getIntPtrConstant(stackBias, dl));
	return FrameAddr;
	}


	static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
	const SparcSubtarget *Subtarget) {

	uint64_t depth = Op.getConstantOperandVal(0);

	return getFRAMEADDR(depth, Op, DAG, Subtarget);

	}

	static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI,
	const SparcSubtarget *Subtarget) {
	MachineFunction &MF = DAG.getMachineFunction();
	MachineFrameInfo &MFI = MF.getFrameInfo();
	MFI.setReturnAddressIsTaken(true);

	if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
	return SDValue();

	EVT VT = Op.getValueType();
	SDLoc dl(Op);
	uint64_t depth = Op.getConstantOperandVal(0);

	SDValue RetAddr;
	if (depth == 0) {
	auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
	Register RetReg = MF.addLiveIn(SP::I7, TLI.getRegClassFor(PtrVT));
	RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
	return RetAddr;
	}

	// Need frame address to find return address of the caller.
	SDValue FrameAddr = getFRAMEADDR(depth - 1, Op, DAG, Subtarget, true);

	unsigned Offset = (Subtarget->is64Bit()) ? 120 : 60;
	SDValue Ptr = DAG.getNode(ISD::ADD,
	dl, VT,
	FrameAddr,
	DAG.getIntPtrConstant(Offset, dl));
	RetAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), Ptr, MachinePointerInfo());

	return RetAddr;
	}

	static SDValue LowerF64Op(SDValue SrcReg64, const SDLoc &dl, SelectionDAG &DAG,
	unsigned opcode) {
	assert(SrcReg64.getValueType() == MVT::f64 && "LowerF64Op called on non-double!");
	assert(opcode == ISD::FNEG \|\| opcode == ISD::FABS);

	// Lower fneg/fabs on f64 to fneg/fabs on f32.
	// fneg f64 => fneg f32:sub_even, fmov f32:sub_odd.
	// fabs f64 => fabs f32:sub_even, fmov f32:sub_odd.

	// Note: in little-endian, the floating-point value is stored in the
	// registers are in the opposite order, so the subreg with the sign
	// bit is the highest-numbered (odd), rather than the
	// lowest-numbered (even).

	SDValue Hi32 = DAG.getTargetExtractSubreg(SP::sub_even, dl, MVT::f32,
	SrcReg64);
	SDValue Lo32 = DAG.getTargetExtractSubreg(SP::sub_odd, dl, MVT::f32,
	SrcReg64);

	if (DAG.getDataLayout().isLittleEndian())
	Lo32 = DAG.getNode(opcode, dl, MVT::f32, Lo32);
	else
	Hi32 = DAG.getNode(opcode, dl, MVT::f32, Hi32);

	SDValue DstReg64 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
	dl, MVT::f64), 0);
	DstReg64 = DAG.getTargetInsertSubreg(SP::sub_even, dl, MVT::f64,
	DstReg64, Hi32);
	DstReg64 = DAG.getTargetInsertSubreg(SP::sub_odd, dl, MVT::f64,
	DstReg64, Lo32);
	return DstReg64;
	}

	// Lower a f128 load into two f64 loads.
	static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
	{
	SDLoc dl(Op);
	LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
	assert(LdNode->getOffset().isUndef() && "Unexpected node type");

	Align Alignment = commonAlignment(LdNode->getOriginalAlign(), 8);

	SDValue Hi64 =
	DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LdNode->getBasePtr(),
	LdNode->getPointerInfo(), Alignment);
	EVT addrVT = LdNode->getBasePtr().getValueType();
	SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
	LdNode->getBasePtr(),
	DAG.getConstant(8, dl, addrVT));
	SDValue Lo64 = DAG.getLoad(MVT::f64, dl, LdNode->getChain(), LoPtr,
	LdNode->getPointerInfo().getWithOffset(8),
	Alignment);

	SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
	SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);

	SDNode *InFP128 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
	dl, MVT::f128);
	InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
	MVT::f128,
	SDValue(InFP128, 0),
	Hi64,
	SubRegEven);
	InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
	MVT::f128,
	SDValue(InFP128, 0),
	Lo64,
	SubRegOdd);
	SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1),
	SDValue(Lo64.getNode(), 1) };
	SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	SDValue Ops[2] = {SDValue(InFP128,0), OutChain};
	return DAG.getMergeValues(Ops, dl);
	}

	static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
	{
	LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());

	EVT MemVT = LdNode->getMemoryVT();
	if (MemVT == MVT::f128)
	return LowerF128Load(Op, DAG);

	return Op;
	}

	// Lower a f128 store into two f64 stores.
	static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
	SDLoc dl(Op);
	StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
	assert(StNode->getOffset().isUndef() && "Unexpected node type");

	SDValue SubRegEven = DAG.getTargetConstant(SP::sub_even64, dl, MVT::i32);
	SDValue SubRegOdd = DAG.getTargetConstant(SP::sub_odd64, dl, MVT::i32);

	SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
	dl,
	MVT::f64,
	StNode->getValue(),
	SubRegEven);
	SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG,
	dl,
	MVT::f64,
	StNode->getValue(),
	SubRegOdd);

	Align Alignment = commonAlignment(StNode->getOriginalAlign(), 8);

	SDValue OutChains[2];
	OutChains[0] =
	DAG.getStore(StNode->getChain(), dl, SDValue(Hi64, 0),
	StNode->getBasePtr(), StNode->getPointerInfo(),
	Alignment);
	EVT addrVT = StNode->getBasePtr().getValueType();
	SDValue LoPtr = DAG.getNode(ISD::ADD, dl, addrVT,
	StNode->getBasePtr(),
	DAG.getConstant(8, dl, addrVT));
	OutChains[1] = DAG.getStore(StNode->getChain(), dl, SDValue(Lo64, 0), LoPtr,
	StNode->getPointerInfo().getWithOffset(8),
	Alignment);
	return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
	}

	static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
	{
	SDLoc dl(Op);
	StoreSDNode *St = cast<StoreSDNode>(Op.getNode());

	EVT MemVT = St->getMemoryVT();
	if (MemVT == MVT::f128)
	return LowerF128Store(Op, DAG);

	if (MemVT == MVT::i64) {
	// Custom handling for i64 stores: turn it into a bitcast and a
	// v2i32 store.
	SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
	SDValue Chain = DAG.getStore(
	St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
	St->getOriginalAlign(), St->getMemOperand()->getFlags(),
	St->getAAInfo());
	return Chain;
	}

	return SDValue();
	}

	static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
	assert((Op.getOpcode() == ISD::FNEG \|\| Op.getOpcode() == ISD::FABS)
	&& "invalid opcode");

	SDLoc dl(Op);

	if (Op.getValueType() == MVT::f64)
	return LowerF64Op(Op.getOperand(0), dl, DAG, Op.getOpcode());
	if (Op.getValueType() != MVT::f128)
	return Op;

	// Lower fabs/fneg on f128 to fabs/fneg on f64
	// fabs/fneg f128 => fabs/fneg f64:sub_even64, fmov f64:sub_odd64
	// (As with LowerF64Op, on little-endian, we need to negate the odd
	// subreg)

	SDValue SrcReg128 = Op.getOperand(0);
	SDValue Hi64 = DAG.getTargetExtractSubreg(SP::sub_even64, dl, MVT::f64,
	SrcReg128);
	SDValue Lo64 = DAG.getTargetExtractSubreg(SP::sub_odd64, dl, MVT::f64,
	SrcReg128);

	if (DAG.getDataLayout().isLittleEndian()) {
	if (isV9)
	Lo64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Lo64);
	else
	Lo64 = LowerF64Op(Lo64, dl, DAG, Op.getOpcode());
	} else {
	if (isV9)
	Hi64 = DAG.getNode(Op.getOpcode(), dl, MVT::f64, Hi64);
	else
	Hi64 = LowerF64Op(Hi64, dl, DAG, Op.getOpcode());
	}

	SDValue DstReg128 = SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
	dl, MVT::f128), 0);
	DstReg128 = DAG.getTargetInsertSubreg(SP::sub_even64, dl, MVT::f128,
	DstReg128, Hi64);
	DstReg128 = DAG.getTargetInsertSubreg(SP::sub_odd64, dl, MVT::f128,
	DstReg128, Lo64);
	return DstReg128;
	}

	static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {

	if (Op.getValueType() != MVT::i64)
	return Op;

	SDLoc dl(Op);
	SDValue Src1 = Op.getOperand(0);
	SDValue Src1Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1);
	SDValue Src1Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src1,
	DAG.getConstant(32, dl, MVT::i64));
	Src1Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src1Hi);

	SDValue Src2 = Op.getOperand(1);
	SDValue Src2Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2);
	SDValue Src2Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Src2,
	DAG.getConstant(32, dl, MVT::i64));
	Src2Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src2Hi);


	bool hasChain = false;
	unsigned hiOpc = Op.getOpcode();
	switch (Op.getOpcode()) {
	default: llvm_unreachable("Invalid opcode");
	case ISD::ADDC: hiOpc = ISD::ADDE; break;
	case ISD::ADDE: hasChain = true; break;
	case ISD::SUBC: hiOpc = ISD::SUBE; break;
	case ISD::SUBE: hasChain = true; break;
	}
	SDValue Lo;
	SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Glue);
	if (hasChain) {
	Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo,
	Op.getOperand(2));
	} else {
	Lo = DAG.getNode(Op.getOpcode(), dl, VTs, Src1Lo, Src2Lo);
	}
	SDValue Hi = DAG.getNode(hiOpc, dl, VTs, Src1Hi, Src2Hi, Lo.getValue(1));
	SDValue Carry = Hi.getValue(1);

	Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Lo);
	Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Hi);
	Hi = DAG.getNode(ISD::SHL, dl, MVT::i64, Hi,
	DAG.getConstant(32, dl, MVT::i64));

	SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
	SDValue Ops[2] = { Dst, Carry };
	return DAG.getMergeValues(Ops, dl);
	}

	// Custom lower UMULO/SMULO for SPARC. This code is similar to ExpandNode()
	// in LegalizeDAG.cpp except the order of arguments to the library function.
	static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
	const SparcTargetLowering &TLI)
	{
	unsigned opcode = Op.getOpcode();
	assert((opcode == ISD::UMULO \|\| opcode == ISD::SMULO) && "Invalid Opcode.");

	bool isSigned = (opcode == ISD::SMULO);
	EVT VT = MVT::i64;
	EVT WideVT = MVT::i128;
	SDLoc dl(Op);
	SDValue LHS = Op.getOperand(0);

	if (LHS.getValueType() != VT)
	return Op;

	SDValue ShiftAmt = DAG.getConstant(63, dl, VT);

	SDValue RHS = Op.getOperand(1);
	SDValue HiLHS, HiRHS;
	if (isSigned) {
	HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
	HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
	} else {
	HiLHS = DAG.getConstant(0, dl, VT);
	HiRHS = DAG.getConstant(0, dl, MVT::i64);
	}

	SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };

	TargetLowering::MakeLibCallOptions CallOptions;
	CallOptions.setSExt(isSigned);
	SDValue MulResult = TLI.makeLibCall(DAG,
	RTLIB::MUL_I128, WideVT,
	Args, CallOptions, dl).first;
	SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
	MulResult, DAG.getIntPtrConstant(0, dl));
	SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
	MulResult, DAG.getIntPtrConstant(1, dl));
	if (isSigned) {
	SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
	TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE);
	} else {
	TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, DAG.getConstant(0, dl, VT),
	ISD::SETNE);
	}
	// MulResult is a node with an illegal type. Because such things are not
	// generally permitted during this phase of legalization, ensure that
	// nothing is left using the node. The above EXTRACT_ELEMENT nodes should have
	// been folded.
	assert(MulResult->use_empty() && "Illegally typed node still in use!");

	SDValue Ops[2] = { BottomHalf, TopHalf } ;
	return DAG.getMergeValues(Ops, dl);
	}

	static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
	if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering())) {
	// Expand with a fence.
	return SDValue();
	}

	// Monotonic load/stores are legal.
	return Op;
	}

	SDValue SparcTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
	SelectionDAG &DAG) const {
	unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
	SDLoc dl(Op);
	switch (IntNo) {
	default: return SDValue(); // Don't custom lower most intrinsics.
	case Intrinsic::thread_pointer: {
	EVT PtrVT = getPointerTy(DAG.getDataLayout());
	return DAG.getRegister(SP::G7, PtrVT);
	}
	}
	}

	SDValue SparcTargetLowering::
	LowerOperation(SDValue Op, SelectionDAG &DAG) const {

	bool hasHardQuad = Subtarget->hasHardQuad();
	bool isV9 = Subtarget->isV9();

	switch (Op.getOpcode()) {
	default: llvm_unreachable("Should not custom lower this!");

	case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG, *this,
	Subtarget);
	case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG,
	Subtarget);
	case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
	case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
	case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
	case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
	case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, *this,
	hasHardQuad);
	case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG, *this,
	hasHardQuad);
	case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG, *this,
	hasHardQuad);
	case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
	hasHardQuad);
	case ISD::BR_CC: return LowerBR_CC(Op, DAG, *this,
	hasHardQuad);
	case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, *this,
	hasHardQuad);
	case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
	case ISD::VAARG: return LowerVAARG(Op, DAG);
	case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
	Subtarget);

	case ISD::LOAD: return LowerLOAD(Op, DAG);
	case ISD::STORE: return LowerSTORE(Op, DAG);
	case ISD::FADD: return LowerF128Op(Op, DAG,
	getLibcallName(RTLIB::ADD_F128), 2);
	case ISD::FSUB: return LowerF128Op(Op, DAG,
	getLibcallName(RTLIB::SUB_F128), 2);
	case ISD::FMUL: return LowerF128Op(Op, DAG,
	getLibcallName(RTLIB::MUL_F128), 2);
	case ISD::FDIV: return LowerF128Op(Op, DAG,
	getLibcallName(RTLIB::DIV_F128), 2);
	case ISD::FSQRT: return LowerF128Op(Op, DAG,
	getLibcallName(RTLIB::SQRT_F128),1);
	case ISD::FABS:
	case ISD::FNEG: return LowerFNEGorFABS(Op, DAG, isV9);
	case ISD::FP_EXTEND: return LowerF128_FPEXTEND(Op, DAG, *this);
	case ISD::FP_ROUND: return LowerF128_FPROUND(Op, DAG, *this);
	case ISD::ADDC:
	case ISD::ADDE:
	case ISD::SUBC:
	case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
	case ISD::UMULO:
	case ISD::SMULO: return LowerUMULO_SMULO(Op, DAG, *this);
	case ISD::ATOMIC_LOAD:
	case ISD::ATOMIC_STORE: return LowerATOMIC_LOAD_STORE(Op, DAG);
	case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
	}
	}

	SDValue SparcTargetLowering::bitcastConstantFPToInt(ConstantFPSDNode *C,
	const SDLoc &DL,
	SelectionDAG &DAG) const {
	APInt V = C->getValueAPF().bitcastToAPInt();
	SDValue Lo = DAG.getConstant(V.zextOrTrunc(32), DL, MVT::i32);
	SDValue Hi = DAG.getConstant(V.lshr(32).zextOrTrunc(32), DL, MVT::i32);
	if (DAG.getDataLayout().isLittleEndian())
	std::swap(Lo, Hi);
	return DAG.getBuildVector(MVT::v2i32, DL, {Hi, Lo});
	}

	SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	SDLoc dl(N);
	SDValue Src = N->getOperand(0);

	if (isa<ConstantFPSDNode>(Src) && N->getSimpleValueType(0) == MVT::v2i32 &&
	Src.getSimpleValueType() == MVT::f64)
	return bitcastConstantFPToInt(cast<ConstantFPSDNode>(Src), dl, DCI.DAG);

	return SDValue();
	}

	SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N,
	DAGCombinerInfo &DCI) const {
	switch (N->getOpcode()) {
	default:
	break;
	case ISD::BITCAST:
	return PerformBITCASTCombine(N, DCI);
	}
	return SDValue();
	}

	MachineBasicBlock *
	SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *BB) const {
	switch (MI.getOpcode()) {
	default: llvm_unreachable("Unknown SELECT_CC!");
	case SP::SELECT_CC_Int_ICC:
	case SP::SELECT_CC_FP_ICC:
	case SP::SELECT_CC_DFP_ICC:
	case SP::SELECT_CC_QFP_ICC:
	return expandSelectCC(MI, BB, SP::BCOND);
	case SP::SELECT_CC_Int_XCC:
	case SP::SELECT_CC_FP_XCC:
	case SP::SELECT_CC_DFP_XCC:
	case SP::SELECT_CC_QFP_XCC:
	return expandSelectCC(MI, BB, SP::BPXCC);
	case SP::SELECT_CC_Int_FCC:
	case SP::SELECT_CC_FP_FCC:
	case SP::SELECT_CC_DFP_FCC:
	case SP::SELECT_CC_QFP_FCC:
	return expandSelectCC(MI, BB, SP::FBCOND);
	}
	}

	MachineBasicBlock *
	SparcTargetLowering::expandSelectCC(MachineInstr &MI, MachineBasicBlock *BB,
	unsigned BROpcode) const {
	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
	DebugLoc dl = MI.getDebugLoc();
	unsigned CC = (SPCC::CondCodes)MI.getOperand(3).getImm();

	// To "insert" a SELECT_CC instruction, we actually have to insert the
	// triangle control-flow pattern. The incoming instruction knows the
	// destination vreg to set, the condition code register to branch on, the
	// true/false values to select between, and the condition code for the branch.
	//
	// We produce the following control flow:
	// ThisMBB
	// \| \
	// \| IfFalseMBB
	// \| /
	// SinkMBB
	const BasicBlock *LLVM_BB = BB->getBasicBlock();
	MachineFunction::iterator It = ++BB->getIterator();

	MachineBasicBlock *ThisMBB = BB;
	MachineFunction *F = BB->getParent();
	MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
	MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
	F->insert(It, IfFalseMBB);
	F->insert(It, SinkMBB);

	// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
	SinkMBB->splice(SinkMBB->begin(), ThisMBB,
	std::next(MachineBasicBlock::iterator(MI)), ThisMBB->end());
	SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);

	// Set the new successors for ThisMBB.
	ThisMBB->addSuccessor(IfFalseMBB);
	ThisMBB->addSuccessor(SinkMBB);

	BuildMI(ThisMBB, dl, TII.get(BROpcode))
	.addMBB(SinkMBB)
	.addImm(CC);

	// IfFalseMBB just falls through to SinkMBB.
	IfFalseMBB->addSuccessor(SinkMBB);

	// %Result = phi [ %TrueValue, ThisMBB ], [ %FalseValue, IfFalseMBB ]
	BuildMI(*SinkMBB, SinkMBB->begin(), dl, TII.get(SP::PHI),
	MI.getOperand(0).getReg())
	.addReg(MI.getOperand(1).getReg())
	.addMBB(ThisMBB)
	.addReg(MI.getOperand(2).getReg())
	.addMBB(IfFalseMBB);

	MI.eraseFromParent(); // The pseudo instruction is gone now.
	return SinkMBB;
	}

	//===----------------------------------------------------------------------===//
	// Sparc Inline Assembly Support
	//===----------------------------------------------------------------------===//

	/// getConstraintType - Given a constraint letter, return the type of
	/// constraint it is for this target.
	SparcTargetLowering::ConstraintType
	SparcTargetLowering::getConstraintType(StringRef Constraint) const {
	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	default: break;
	case 'r':
	case 'f':
	case 'e':
	return C_RegisterClass;
	case 'I': // SIMM13
	return C_Immediate;
	}
	}

	return TargetLowering::getConstraintType(Constraint);
	}

	TargetLowering::ConstraintWeight SparcTargetLowering::
	getSingleConstraintMatchWeight(AsmOperandInfo &info,
	const char *constraint) const {
	ConstraintWeight weight = CW_Invalid;
	Value *CallOperandVal = info.CallOperandVal;
	// If we don't have a value, we can't do a match,
	// but allow it at the lowest weight.
	if (!CallOperandVal)
	return CW_Default;

	// Look at the constraint type.
	switch (*constraint) {
	default:
	weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
	break;
	case 'I': // SIMM13
	if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
	if (isInt<13>(C->getSExtValue()))
	weight = CW_Constant;
	}
	break;
	}
	return weight;
	}

	/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
	/// vector. If it is invalid, don't add anything to Ops.
	void SparcTargetLowering::
	LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const {
	SDValue Result;

	// Only support length 1 constraints for now.
	if (Constraint.length() > 1)
	return;

	char ConstraintLetter = Constraint[0];
	switch (ConstraintLetter) {
	default: break;
	case 'I':
	if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
	if (isInt<13>(C->getSExtValue())) {
	Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
	Op.getValueType());
	break;
	}
	return;
	}
	}

	if (Result.getNode()) {
	Ops.push_back(Result);
	return;
	}
	TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
	}

	std::pair<unsigned, const TargetRegisterClass *>
	SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint,
	MVT VT) const {
	if (Constraint.empty())
	return std::make_pair(0U, nullptr);

	if (Constraint.size() == 1) {
	switch (Constraint[0]) {
	case 'r':
	if (VT == MVT::v2i32)
	return std::make_pair(0U, &SP::IntPairRegClass);
	else if (Subtarget->is64Bit())
	return std::make_pair(0U, &SP::I64RegsRegClass);
	else
	return std::make_pair(0U, &SP::IntRegsRegClass);
	case 'f':
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	return std::make_pair(0U, &SP::FPRegsRegClass);
	else if (VT == MVT::f64 \|\| VT == MVT::i64)
	return std::make_pair(0U, &SP::LowDFPRegsRegClass);
	else if (VT == MVT::f128)
	return std::make_pair(0U, &SP::LowQFPRegsRegClass);
	// This will generate an error message
	return std::make_pair(0U, nullptr);
	case 'e':
	if (VT == MVT::f32 \|\| VT == MVT::i32)
	return std::make_pair(0U, &SP::FPRegsRegClass);
	else if (VT == MVT::f64 \|\| VT == MVT::i64 )
	return std::make_pair(0U, &SP::DFPRegsRegClass);
	else if (VT == MVT::f128)
	return std::make_pair(0U, &SP::QFPRegsRegClass);
	// This will generate an error message
	return std::make_pair(0U, nullptr);
	}
	}

	if (Constraint.front() != '{')
	return std::make_pair(0U, nullptr);

	assert(Constraint.back() == '}' && "Not a brace enclosed constraint?");
	StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
	if (RegName.empty())
	return std::make_pair(0U, nullptr);

	unsigned long long RegNo;
	// Handle numbered register aliases.
	if (RegName[0] == 'r' &&
	getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
	// r0-r7 -> g0-g7
	// r8-r15 -> o0-o7
	// r16-r23 -> l0-l7
	// r24-r31 -> i0-i7
	if (RegNo > 31)
	return std::make_pair(0U, nullptr);
	const char RegTypes[] = {'g', 'o', 'l', 'i'};
	char RegType = RegTypes[RegNo / 8];
	char RegIndex = '0' + (RegNo % 8);
	char Tmp[] = {'{', RegType, RegIndex, '}', 0};
	return getRegForInlineAsmConstraint(TRI, Tmp, VT);
	}

	// Rewrite the fN constraint according to the value type if needed.
	if (VT != MVT::f32 && VT != MVT::Other && RegName[0] == 'f' &&
	getAsUnsignedInteger(RegName.begin() + 1, 10, RegNo)) {
	if (VT == MVT::f64 && (RegNo % 2 == 0)) {
	return getRegForInlineAsmConstraint(
	TRI, StringRef("{d" + utostr(RegNo / 2) + "}"), VT);
	} else if (VT == MVT::f128 && (RegNo % 4 == 0)) {
	return getRegForInlineAsmConstraint(
	TRI, StringRef("{q" + utostr(RegNo / 4) + "}"), VT);
	} else {
	return std::make_pair(0U, nullptr);
	}
	}

	auto ResultPair =
	TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
	if (!ResultPair.second)
	return std::make_pair(0U, nullptr);

	// Force the use of I64Regs over IntRegs for 64-bit values.
	if (Subtarget->is64Bit() && VT == MVT::i64) {
	assert(ResultPair.second == &SP::IntRegsRegClass &&
	"Unexpected register class");
	return std::make_pair(ResultPair.first, &SP::I64RegsRegClass);
	}

	return ResultPair;
	}

	bool
	SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
	// The Sparc target isn't yet aware of offsets.
	return false;
	}

	void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue>& Results,
	SelectionDAG &DAG) const {

	SDLoc dl(N);

	RTLIB::Libcall libCall = RTLIB::UNKNOWN_LIBCALL;

	switch (N->getOpcode()) {
	default:
	llvm_unreachable("Do not know how to custom type legalize this operation!");

	case ISD::FP_TO_SINT:
	case ISD::FP_TO_UINT:
	// Custom lower only if it involves f128 or i64.
	if (N->getOperand(0).getValueType() != MVT::f128
	\|\| N->getValueType(0) != MVT::i64)
	return;
	libCall = ((N->getOpcode() == ISD::FP_TO_SINT)
	? RTLIB::FPTOSINT_F128_I64
	: RTLIB::FPTOUINT_F128_I64);

	Results.push_back(LowerF128Op(SDValue(N, 0),
	DAG,
	getLibcallName(libCall),
	1));
	return;
	case ISD::READCYCLECOUNTER: {
	assert(Subtarget->hasLeonCycleCounter());
	SDValue Lo = DAG.getCopyFromReg(N->getOperand(0), dl, SP::ASR23, MVT::i32);
	SDValue Hi = DAG.getCopyFromReg(Lo, dl, SP::G0, MVT::i32);
	SDValue Ops[] = { Lo, Hi };
	SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops);
	Results.push_back(Pair);
	Results.push_back(N->getOperand(0));
	return;
	}
	case ISD::SINT_TO_FP:
	case ISD::UINT_TO_FP:
	// Custom lower only if it involves f128 or i64.
	if (N->getValueType(0) != MVT::f128
	\|\| N->getOperand(0).getValueType() != MVT::i64)
	return;

	libCall = ((N->getOpcode() == ISD::SINT_TO_FP)
	? RTLIB::SINTTOFP_I64_F128
	: RTLIB::UINTTOFP_I64_F128);

	Results.push_back(LowerF128Op(SDValue(N, 0),
	DAG,
	getLibcallName(libCall),
	1));
	return;
	case ISD::LOAD: {
	LoadSDNode *Ld = cast<LoadSDNode>(N);
	// Custom handling only for i64: turn i64 load into a v2i32 load,
	// and a bitcast.
	if (Ld->getValueType(0) != MVT::i64 \|\| Ld->getMemoryVT() != MVT::i64)
	return;

	SDLoc dl(N);
	SDValue LoadRes = DAG.getExtLoad(
	Ld->getExtensionType(), dl, MVT::v2i32, Ld->getChain(),
	Ld->getBasePtr(), Ld->getPointerInfo(), MVT::v2i32,
	Ld->getOriginalAlign(), Ld->getMemOperand()->getFlags(),
	Ld->getAAInfo());

	SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
	Results.push_back(Res);
	Results.push_back(LoadRes.getValue(1));
	return;
	}
	}
	}

	// Override to enable LOAD_STACK_GUARD lowering on Linux.
	bool SparcTargetLowering::useLoadStackGuardNode() const {
	if (!Subtarget->isTargetLinux())
	return TargetLowering::useLoadStackGuardNode();
	return true;
	}

	// Override to disable global variable loading on Linux.
	void SparcTargetLowering::insertSSPDeclarations(Module &M) const {
	if (!Subtarget->isTargetLinux())
	return TargetLowering::insertSSPDeclarations(M);
	}
	diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
	index 2768bb20566a..16e4f2687054 100644
	--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
	+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
	@@ -1,217 +1,222 @@
	//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------- C++ --===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file defines the interfaces that Sparc uses to lower LLVM code into a
	// selection DAG.
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H
	#define LLVM_LIB_TARGET_SPARC_SPARCISELLOWERING_H

	#include "Sparc.h"
	#include "llvm/CodeGen/TargetLowering.h"

	namespace llvm {
	class SparcSubtarget;

	namespace SPISD {
	enum NodeType : unsigned {
	FIRST_NUMBER = ISD::BUILTIN_OP_END,
	CMPICC, // Compare two GPR operands, set icc+xcc.
	CMPFCC, // Compare two FP operands, set fcc.
	BRICC, // Branch to dest on icc condition
	BRXCC, // Branch to dest on xcc condition (64-bit only).
	BRFCC, // Branch to dest on fcc condition
	SELECT_ICC, // Select between two values using the current ICC flags.
	SELECT_XCC, // Select between two values using the current XCC flags.
	SELECT_FCC, // Select between two values using the current FCC flags.

	Hi, Lo, // Hi/Lo operations, typically on a global address.

	FTOI, // FP to Int within a FP register.
	ITOF, // Int to FP within a FP register.
	FTOX, // FP to Int64 within a FP register.
	XTOF, // Int64 to FP within a FP register.

	CALL, // A call instruction.
	RET_FLAG, // Return with a flag operand.
	GLOBAL_BASE_REG, // Global base reg for PIC.
	FLUSHW, // FLUSH register windows to stack.

	TAIL_CALL, // Tail call

	TLS_ADD, // For Thread Local Storage (TLS).
	TLS_LD,
	TLS_CALL,

	LOAD_GDOP, // Load operation w/ gdop relocation.
	};
	}

	class SparcTargetLowering : public TargetLowering {
	const SparcSubtarget *Subtarget;
	public:
	SparcTargetLowering(const TargetMachine &TM, const SparcSubtarget &STI);
	SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

	bool useSoftFloat() const override;

	/// computeKnownBitsForTargetNode - Determine which of the bits specified
	/// in Mask are known to be either zero or one and return them in the
	/// KnownZero/KnownOne bitsets.
	void computeKnownBitsForTargetNode(const SDValue Op,
	KnownBits &Known,
	const APInt &DemandedElts,
	const SelectionDAG &DAG,
	unsigned Depth = 0) const override;

	MachineBasicBlock *
	EmitInstrWithCustomInserter(MachineInstr &MI,
	MachineBasicBlock *MBB) const override;

	const char *getTargetNodeName(unsigned Opcode) const override;

	ConstraintType getConstraintType(StringRef Constraint) const override;
	ConstraintWeight
	getSingleConstraintMatchWeight(AsmOperandInfo &info,
	const char *constraint) const override;
	void LowerAsmOperandForConstraint(SDValue Op,
	std::string &Constraint,
	std::vector<SDValue> &Ops,
	SelectionDAG &DAG) const override;

	std::pair<unsigned, const TargetRegisterClass *>
	getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
	StringRef Constraint, MVT VT) const override;

	bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
	MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
	return MVT::i32;
	}

	Register getRegisterByName(const char* RegName, LLT VT,
	const MachineFunction &MF) const override;

	/// If a physical register, this returns the register that receives the
	/// exception address on entry to an EH pad.
	Register
	getExceptionPointerRegister(const Constant *PersonalityFn) const override {
	return SP::I0;
	}

	/// If a physical register, this returns the register that receives the
	/// exception typeid on entry to a landing pad.
	Register
	getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
	return SP::I1;
	}

	/// Override to support customized stack guard loading.
	bool useLoadStackGuardNode() const override;
	void insertSSPDeclarations(Module &M) const override;

	/// getSetCCResultType - Return the ISD::SETCC ValueType
	EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
	EVT VT) const override;

	SDValue
	LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &dl, SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &InVals) const override;
	SDValue LowerFormalArguments_32(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &dl, SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &InVals) const;
	SDValue LowerFormalArguments_64(SDValue Chain, CallingConv::ID CallConv,
	bool isVarArg,
	const SmallVectorImpl<ISD::InputArg> &Ins,
	const SDLoc &dl, SelectionDAG &DAG,
	SmallVectorImpl<SDValue> &InVals) const;

	SDValue
	LowerCall(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const override;
	SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const;
	SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
	SmallVectorImpl<SDValue> &InVals) const;

	+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
	+ bool isVarArg,
	+ const SmallVectorImpl<ISD::OutputArg> &Outs,
	+ LLVMContext &Context) const override;
	+
	SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &dl, SelectionDAG &DAG) const override;
	SDValue LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const;
	SDValue LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
	bool IsVarArg,
	const SmallVectorImpl<ISD::OutputArg> &Outs,
	const SmallVectorImpl<SDValue> &OutVals,
	const SDLoc &DL, SelectionDAG &DAG) const;

	SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
	SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;

	SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const;
	SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
	SelectionDAG &DAG) const;
	SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const;

	SDValue LowerF128_LibCallArg(SDValue Chain, ArgListTy &Args, SDValue Arg,
	const SDLoc &DL, SelectionDAG &DAG) const;
	SDValue LowerF128Op(SDValue Op, SelectionDAG &DAG,
	const char *LibFuncName,
	unsigned numArgs) const;
	SDValue LowerF128Compare(SDValue LHS, SDValue RHS, unsigned &SPCC,
	const SDLoc &DL, SelectionDAG &DAG) const;

	SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;

	SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const;

	SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL,
	SelectionDAG &DAG) const;

	SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;

	bool IsEligibleForTailCallOptimization(CCState &CCInfo,
	CallLoweringInfo &CLI,
	MachineFunction &MF) const;

	bool ShouldShrinkFPConstant(EVT VT) const override {
	// Do not shrink FP constpool if VT == MVT::f128.
	// (ldd, call _Q_fdtoq) is more expensive than two ldds.
	return VT != MVT::f128;
	}

	bool shouldInsertFencesForAtomic(const Instruction *I) const override {
	// FIXME: We insert fences for each atomics and generate
	// sub-optimal code for PSO/TSO. (Approximately nobody uses any
	// mode but TSO, which makes this even more silly)
	return true;
	}

	AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;

	void ReplaceNodeResults(SDNode *N,
	SmallVectorImpl<SDValue>& Results,
	SelectionDAG &DAG) const override;

	MachineBasicBlock expandSelectCC(MachineInstr &MI, MachineBasicBlock BB,
	unsigned BROpcode) const;
	};
	} // end namespace llvm

	#endif // SPARC_ISELLOWERING_H
	diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
	index 6df0409256bb..6fc7b29c5b78 100644
	--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
	+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
	@@ -1,2619 +1,2619 @@
	//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This pass transforms simple global variables that never have their address
	// taken. If obviously true, it marks read/write globals as constant, deletes
	// variables only stored to, etc.
	//
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/IPO/GlobalOpt.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Analysis/BlockFrequencyInfo.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/MemoryBuiltins.h"
	#include "llvm/Analysis/TargetLibraryInfo.h"
	#include "llvm/Analysis/TargetTransformInfo.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/BinaryFormat/Dwarf.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/CallingConv.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/GlobalValue.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/IR/ValueHandle.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/IPO.h"
	#include "llvm/Transforms/Utils/CtorUtils.h"
	#include "llvm/Transforms/Utils/Evaluator.h"
	#include "llvm/Transforms/Utils/GlobalStatus.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include <cassert>
	#include <cstdint>
	#include <utility>
	#include <vector>

	using namespace llvm;

	#define DEBUG_TYPE "globalopt"

	STATISTIC(NumMarked , "Number of globals marked constant");
	STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
	STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
	STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
	STATISTIC(NumDeleted , "Number of globals deleted");
	STATISTIC(NumGlobUses , "Number of global uses devirtualized");
	STATISTIC(NumLocalized , "Number of globals localized");
	STATISTIC(NumShrunkToBool , "Number of global vars shrunk to booleans");
	STATISTIC(NumFastCallFns , "Number of functions converted to fastcc");
	STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
	STATISTIC(NumNestRemoved , "Number of nest attributes removed");
	STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
	STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
	STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed");
	STATISTIC(NumInternalFunc, "Number of internal functions");
	STATISTIC(NumColdCC, "Number of functions marked coldcc");

	static cl::opt<bool>
	EnableColdCCStressTest("enable-coldcc-stress-test",
	cl::desc("Enable stress test of coldcc by adding "
	"calling conv to all internal functions."),
	cl::init(false), cl::Hidden);

	static cl::opt<int> ColdCCRelFreq(
	"coldcc-rel-freq", cl::Hidden, cl::init(2),
	cl::desc(
	"Maximum block frequency, expressed as a percentage of caller's "
	"entry frequency, for a call site to be considered cold for enabling"
	"coldcc"));

	/// Is this global variable possibly used by a leak checker as a root? If so,
	/// we might not really want to eliminate the stores to it.
	static bool isLeakCheckerRoot(GlobalVariable *GV) {
	// A global variable is a root if it is a pointer, or could plausibly contain
	// a pointer. There are two challenges; one is that we could have a struct
	// the has an inner member which is a pointer. We recurse through the type to
	// detect these (up to a point). The other is that we may actually be a union
	// of a pointer and another type, and so our LLVM type is an integer which
	// gets converted into a pointer, or our type is an [i8 x #] with a pointer
	// potentially contained here.

	if (GV->hasPrivateLinkage())
	return false;

	SmallVector<Type *, 4> Types;
	Types.push_back(GV->getValueType());

	unsigned Limit = 20;
	do {
	Type *Ty = Types.pop_back_val();
	switch (Ty->getTypeID()) {
	default: break;
	case Type::PointerTyID:
	return true;
	case Type::FixedVectorTyID:
	case Type::ScalableVectorTyID:
	if (cast<VectorType>(Ty)->getElementType()->isPointerTy())
	return true;
	break;
	case Type::ArrayTyID:
	Types.push_back(cast<ArrayType>(Ty)->getElementType());
	break;
	case Type::StructTyID: {
	StructType *STy = cast<StructType>(Ty);
	if (STy->isOpaque()) return true;
	for (StructType::element_iterator I = STy->element_begin(),
	E = STy->element_end(); I != E; ++I) {
	Type InnerTy = I;
	if (isa<PointerType>(InnerTy)) return true;
	if (isa<StructType>(InnerTy) \|\| isa<ArrayType>(InnerTy) \|\|
	isa<VectorType>(InnerTy))
	Types.push_back(InnerTy);
	}
	break;
	}
	}
	if (--Limit == 0) return true;
	} while (!Types.empty());
	return false;
	}

	/// Given a value that is stored to a global but never read, determine whether
	/// it's safe to remove the store and the chain of computation that feeds the
	/// store.
	static bool IsSafeComputationToRemove(
	Value *V, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
	do {
	if (isa<Constant>(V))
	return true;
	if (!V->hasOneUse())
	return false;
	if (isa<LoadInst>(V) \|\| isa<InvokeInst>(V) \|\| isa<Argument>(V) \|\|
	isa<GlobalValue>(V))
	return false;
	if (isAllocationFn(V, GetTLI))
	return true;

	Instruction *I = cast<Instruction>(V);
	if (I->mayHaveSideEffects())
	return false;
	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
	if (!GEP->hasAllConstantIndices())
	return false;
	} else if (I->getNumOperands() != 1) {
	return false;
	}

	V = I->getOperand(0);
	} while (true);
	}

	/// This GV is a pointer root. Loop over all users of the global and clean up
	/// any that obviously don't assign the global a value that isn't dynamically
	/// allocated.
	static bool
	CleanupPointerRootUsers(GlobalVariable *GV,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
	// A brief explanation of leak checkers. The goal is to find bugs where
	// pointers are forgotten, causing an accumulating growth in memory
	// usage over time. The common strategy for leak checkers is to explicitly
	// allow the memory pointed to by globals at exit. This is popular because it
	// also solves another problem where the main thread of a C++ program may shut
	// down before other threads that are still expecting to use those globals. To
	// handle that case, we expect the program may create a singleton and never
	// destroy it.

	bool Changed = false;

	// If Dead[n].first is the only use of a malloc result, we can delete its
	// chain of computation and the store to the global in Dead[n].second.
	SmallVector<std::pair<Instruction , Instruction >, 32> Dead;

	// Constants can't be pointers to dynamically allocated memory.
	for (User *U : llvm::make_early_inc_range(GV->users())) {
	if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
	Value *V = SI->getValueOperand();
	if (isa<Constant>(V)) {
	Changed = true;
	SI->eraseFromParent();
	} else if (Instruction *I = dyn_cast<Instruction>(V)) {
	if (I->hasOneUse())
	Dead.push_back(std::make_pair(I, SI));
	}
	} else if (MemSetInst *MSI = dyn_cast<MemSetInst>(U)) {
	if (isa<Constant>(MSI->getValue())) {
	Changed = true;
	MSI->eraseFromParent();
	} else if (Instruction *I = dyn_cast<Instruction>(MSI->getValue())) {
	if (I->hasOneUse())
	Dead.push_back(std::make_pair(I, MSI));
	}
	} else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(U)) {
	GlobalVariable *MemSrc = dyn_cast<GlobalVariable>(MTI->getSource());
	if (MemSrc && MemSrc->isConstant()) {
	Changed = true;
	MTI->eraseFromParent();
	} else if (Instruction *I = dyn_cast<Instruction>(MTI->getSource())) {
	if (I->hasOneUse())
	Dead.push_back(std::make_pair(I, MTI));
	}
	} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
	if (CE->use_empty()) {
	CE->destroyConstant();
	Changed = true;
	}
	} else if (Constant *C = dyn_cast<Constant>(U)) {
	if (isSafeToDestroyConstant(C)) {
	C->destroyConstant();
	// This could have invalidated UI, start over from scratch.
	Dead.clear();
	CleanupPointerRootUsers(GV, GetTLI);
	return true;
	}
	}
	}

	for (int i = 0, e = Dead.size(); i != e; ++i) {
	if (IsSafeComputationToRemove(Dead[i].first, GetTLI)) {
	Dead[i].second->eraseFromParent();
	Instruction *I = Dead[i].first;
	do {
	if (isAllocationFn(I, GetTLI))
	break;
	Instruction *J = dyn_cast<Instruction>(I->getOperand(0));
	if (!J)
	break;
	I->eraseFromParent();
	I = J;
	} while (true);
	I->eraseFromParent();
	Changed = true;
	}
	}

	return Changed;
	}

	/// We just marked GV constant. Loop over all users of the global, cleaning up
	/// the obvious ones. This is largely just a quick scan over the use list to
	/// clean up the easy and obvious cruft. This returns true if it made a change.
	static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
	const DataLayout &DL) {
	Constant *Init = GV->getInitializer();
	SmallVector<User *, 8> WorkList(GV->users());
	SmallPtrSet<User *, 8> Visited;
	bool Changed = false;

	SmallVector<WeakTrackingVH> MaybeDeadInsts;
	auto EraseFromParent = [&](Instruction *I) {
	for (Value *Op : I->operands())
	if (auto *OpI = dyn_cast<Instruction>(Op))
	MaybeDeadInsts.push_back(OpI);
	I->eraseFromParent();
	Changed = true;
	};
	while (!WorkList.empty()) {
	User *U = WorkList.pop_back_val();
	if (!Visited.insert(U).second)
	continue;

	if (auto *BO = dyn_cast<BitCastOperator>(U))
	append_range(WorkList, BO->users());
	if (auto *ASC = dyn_cast<AddrSpaceCastOperator>(U))
	append_range(WorkList, ASC->users());
	else if (auto *GEP = dyn_cast<GEPOperator>(U))
	append_range(WorkList, GEP->users());
	else if (auto *LI = dyn_cast<LoadInst>(U)) {
	// A load from a uniform value is always the same, regardless of any
	// applied offset.
	Type *Ty = LI->getType();
	if (Constant *Res = ConstantFoldLoadFromUniformValue(Init, Ty)) {
	LI->replaceAllUsesWith(Res);
	EraseFromParent(LI);
	continue;
	}

	Value *PtrOp = LI->getPointerOperand();
	APInt Offset(DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
	PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
	DL, Offset, /* AllowNonInbounds */ true);
	if (PtrOp == GV) {
	if (auto *Value = ConstantFoldLoadFromConst(Init, Ty, Offset, DL)) {
	LI->replaceAllUsesWith(Value);
	EraseFromParent(LI);
	}
	}
	} else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
	// Store must be unreachable or storing Init into the global.
	EraseFromParent(SI);
	} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
	if (getUnderlyingObject(MI->getRawDest()) == GV)
	EraseFromParent(MI);
	}
	}

	Changed \|=
	RecursivelyDeleteTriviallyDeadInstructionsPermissive(MaybeDeadInsts);
	GV->removeDeadConstantUsers();
	return Changed;
	}

	/// Look at all uses of the global and determine which (offset, type) pairs it
	/// can be split into.
	static bool collectSRATypes(DenseMap<uint64_t, Type > &Types, GlobalValue GV,
	const DataLayout &DL) {
	SmallVector<Use *, 16> Worklist;
	SmallPtrSet<Use *, 16> Visited;
	auto AppendUses = [&](Value *V) {
	for (Use &U : V->uses())
	if (Visited.insert(&U).second)
	Worklist.push_back(&U);
	};
	AppendUses(GV);
	while (!Worklist.empty()) {
	Use *U = Worklist.pop_back_val();
	User *V = U->getUser();

	auto *GEP = dyn_cast<GEPOperator>(V);
	if (isa<BitCastOperator>(V) \|\| isa<AddrSpaceCastOperator>(V) \|\|
	(GEP && GEP->hasAllConstantIndices())) {
	AppendUses(V);
	continue;
	}

	if (Value *Ptr = getLoadStorePointerOperand(V)) {
	// This is storing the global address into somewhere, not storing into
	// the global.
	if (isa<StoreInst>(V) && U->getOperandNo() == 0)
	return false;

	APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
	Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
	/* AllowNonInbounds */ true);
	if (Ptr != GV \|\| Offset.getActiveBits() >= 64)
	return false;

	// TODO: We currently require that all accesses at a given offset must
	// use the same type. This could be relaxed.
	Type *Ty = getLoadStoreType(V);
	auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
	if (Ty != It->second)
	return false;
	continue;
	}

	// Ignore dead constant users.
	if (auto *C = dyn_cast<Constant>(V)) {
	if (!isSafeToDestroyConstant(C))
	return false;
	continue;
	}

	// Unknown user.
	return false;
	}

	return true;
	}

	/// Copy over the debug info for a variable to its SRA replacements.
	static void transferSRADebugInfo(GlobalVariable GV, GlobalVariable NGV,
	uint64_t FragmentOffsetInBits,
	uint64_t FragmentSizeInBits,
	uint64_t VarSize) {
	SmallVector<DIGlobalVariableExpression *, 1> GVs;
	GV->getDebugInfo(GVs);
	for (auto *GVE : GVs) {
	DIVariable *Var = GVE->getVariable();
	DIExpression *Expr = GVE->getExpression();
	int64_t CurVarOffsetInBytes = 0;
	uint64_t CurVarOffsetInBits = 0;

	// Calculate the offset (Bytes), Continue if unknown.
	if (!Expr->extractIfOffset(CurVarOffsetInBytes))
	continue;

	// Ignore negative offset.
	if (CurVarOffsetInBytes < 0)
	continue;

	// Convert offset to bits.
	CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;

	// Current var starts after the fragment, ignore.
	if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
	continue;

	uint64_t CurVarSize = Var->getType()->getSizeInBits();
	// Current variable ends before start of fragment, ignore.
	if (CurVarSize != 0 &&
	(CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
	continue;

	// Current variable fits in the fragment.
	if (CurVarOffsetInBits == FragmentOffsetInBits &&
	CurVarSize == FragmentSizeInBits)
	Expr = DIExpression::get(Expr->getContext(), {});
	// If the FragmentSize is smaller than the variable,
	// emit a fragment expression.
	else if (FragmentSizeInBits < VarSize) {
	if (auto E = DIExpression::createFragmentExpression(
	Expr, FragmentOffsetInBits, FragmentSizeInBits))
	Expr = *E;
	else
	return;
	}
	auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
	NGV->addDebugInfo(NGVE);
	}
	}

	/// Perform scalar replacement of aggregates on the specified global variable.
	/// This opens the door for other optimizations by exposing the behavior of the
	/// program in a more fine-grained way. We have determined that this
	/// transformation is safe already. We return the first global variable we
	/// insert so that the caller can reprocess it.
	static GlobalVariable SRAGlobal(GlobalVariable GV, const DataLayout &DL) {
	assert(GV->hasLocalLinkage());

	// Collect types to split into.
	DenseMap<uint64_t, Type *> Types;
	if (!collectSRATypes(Types, GV, DL) \|\| Types.empty())
	return nullptr;

	// Make sure we don't SRA back to the same type.
	if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
	return nullptr;

	// Don't perform SRA if we would have to split into many globals.
	if (Types.size() > 16)
	return nullptr;

	// Sort by offset.
	SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
	append_range(TypesVector, Types);
	sort(TypesVector, llvm::less_first());

	// Check that the types are non-overlapping.
	uint64_t Offset = 0;
	for (const auto &Pair : TypesVector) {
	// Overlaps with previous type.
	if (Pair.first < Offset)
	return nullptr;

	Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
	}

	// Some accesses go beyond the end of the global, don't bother.
	if (Offset > DL.getTypeAllocSize(GV->getValueType()))
	return nullptr;

	// Collect initializers for new globals.
	Constant *OrigInit = GV->getInitializer();
	DenseMap<uint64_t, Constant *> Initializers;
	for (const auto &Pair : Types) {
	Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
	APInt(64, Pair.first), DL);
	if (!NewInit) {
	LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
	<< GV << " with type " << Pair.second << " at offset "
	<< Pair.first << "\n");
	return nullptr;
	}
	Initializers.insert({Pair.first, NewInit});
	}

	LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");

	// Get the alignment of the global, either explicit or target-specific.
	Align StartAlignment =
	DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
	uint64_t VarSize = DL.getTypeSizeInBits(GV->getValueType());

	// Create replacement globals.
	DenseMap<uint64_t, GlobalVariable *> NewGlobals;
	unsigned NameSuffix = 0;
	for (auto &Pair : TypesVector) {
	uint64_t Offset = Pair.first;
	Type *Ty = Pair.second;
	GlobalVariable *NGV = new GlobalVariable(
	*GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
	Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
	GV->getThreadLocalMode(), GV->getAddressSpace());
	NGV->copyAttributesFrom(GV);
	NewGlobals.insert({Offset, NGV});

	// Calculate the known alignment of the field. If the original aggregate
	// had 256 byte alignment for example, something might depend on that:
	// propagate info to each field.
	Align NewAlign = commonAlignment(StartAlignment, Offset);
	if (NewAlign > DL.getABITypeAlign(Ty))
	NGV->setAlignment(NewAlign);

	// Copy over the debug info for the variable.
	transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
	VarSize);
	}

	// Replace uses of the original global with uses of the new global.
	SmallVector<Value *, 16> Worklist;
	SmallPtrSet<Value *, 16> Visited;
	SmallVector<WeakTrackingVH, 16> DeadInsts;
	auto AppendUsers = [&](Value *V) {
	for (User *U : V->users())
	if (Visited.insert(U).second)
	Worklist.push_back(U);
	};
	AppendUsers(GV);
	while (!Worklist.empty()) {
	Value *V = Worklist.pop_back_val();
	if (isa<BitCastOperator>(V) \|\| isa<AddrSpaceCastOperator>(V) \|\|
	isa<GEPOperator>(V)) {
	AppendUsers(V);
	if (isa<Instruction>(V))
	DeadInsts.push_back(V);
	continue;
	}

	if (Value *Ptr = getLoadStorePointerOperand(V)) {
	APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
	Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,
	/* AllowNonInbounds */ true);
	assert(Ptr == GV && "Load/store must be from/to global");
	GlobalVariable *NGV = NewGlobals[Offset.getZExtValue()];
	assert(NGV && "Must have replacement global for this offset");

	// Update the pointer operand and recalculate alignment.
	Align PrefAlign = DL.getPrefTypeAlign(getLoadStoreType(V));
	Align NewAlign =
	getOrEnforceKnownAlignment(NGV, PrefAlign, DL, cast<Instruction>(V));

	if (auto *LI = dyn_cast<LoadInst>(V)) {
	LI->setOperand(0, NGV);
	LI->setAlignment(NewAlign);
	} else {
	auto *SI = cast<StoreInst>(V);
	SI->setOperand(1, NGV);
	SI->setAlignment(NewAlign);
	}
	continue;
	}

	assert(isa<Constant>(V) && isSafeToDestroyConstant(cast<Constant>(V)) &&
	"Other users can only be dead constants");
	}

	// Delete old instructions and global.
	RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
	GV->removeDeadConstantUsers();
	GV->eraseFromParent();
	++NumSRA;

	assert(NewGlobals.size() > 0);
	return NewGlobals.begin()->second;
	}

	/// Return true if all users of the specified value will trap if the value is
	/// dynamically null. PHIs keeps track of any phi nodes we've seen to avoid
	/// reprocessing them.
	static bool AllUsesOfValueWillTrapIfNull(const Value *V,
	SmallPtrSetImpl<const PHINode*> &PHIs) {
	for (const User *U : V->users()) {
	if (const Instruction *I = dyn_cast<Instruction>(U)) {
	// If null pointer is considered valid, then all uses are non-trapping.
	// Non address-space 0 globals have already been pruned by the caller.
	if (NullPointerIsDefined(I->getFunction()))
	return false;
	}
	if (isa<LoadInst>(U)) {
	// Will trap.
	} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
	if (SI->getOperand(0) == V) {
	return false; // Storing the value.
	}
	} else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
	if (CI->getCalledOperand() != V) {
	return false; // Not calling the ptr
	}
	} else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
	if (II->getCalledOperand() != V) {
	return false; // Not calling the ptr
	}
	} else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
	if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
	} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
	if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
	} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
	// If we've already seen this phi node, ignore it, it has already been
	// checked.
	if (PHIs.insert(PN).second && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
	return false;
	} else if (isa<ICmpInst>(U) &&
	!ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
	isa<LoadInst>(U->getOperand(0)) &&
	isa<ConstantPointerNull>(U->getOperand(1))) {
	assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
	->getPointerOperand()
	->stripPointerCasts()) &&
	"Should be GlobalVariable");
	// This and only this kind of non-signed ICmpInst is to be replaced with
	// the comparing of the value of the created global init bool later in
	// optimizeGlobalAddressOfAllocation for the global variable.
	} else {
	return false;
	}
	}
	return true;
	}

	/// Return true if all uses of any loads from GV will trap if the loaded value
	/// is null. Note that this also permits comparisons of the loaded value
	/// against null, as a special case.
	static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
	SmallVector<const Value *, 4> Worklist;
	Worklist.push_back(GV);
	while (!Worklist.empty()) {
	const Value *P = Worklist.pop_back_val();
	for (auto *U : P->users()) {
	if (auto *LI = dyn_cast<LoadInst>(U)) {
	SmallPtrSet<const PHINode *, 8> PHIs;
	if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
	return false;
	} else if (auto *SI = dyn_cast<StoreInst>(U)) {
	// Ignore stores to the global.
	if (SI->getPointerOperand() != P)
	return false;
	} else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
	if (CE->stripPointerCasts() != GV)
	return false;
	// Check further the ConstantExpr.
	Worklist.push_back(CE);
	} else {
	// We don't know or understand this user, bail out.
	return false;
	}
	}
	}

	return true;
	}

	/// Get all the loads/store uses for global variable \p GV.
	static void allUsesOfLoadAndStores(GlobalVariable *GV,
	SmallVector<Value *, 4> &Uses) {
	SmallVector<Value *, 4> Worklist;
	Worklist.push_back(GV);
	while (!Worklist.empty()) {
	auto *P = Worklist.pop_back_val();
	for (auto *U : P->users()) {
	if (auto *CE = dyn_cast<ConstantExpr>(U)) {
	Worklist.push_back(CE);
	continue;
	}

	assert((isa<LoadInst>(U) \|\| isa<StoreInst>(U)) &&
	"Expect only load or store instructions");
	Uses.push_back(U);
	}
	}
	}

	static bool OptimizeAwayTrappingUsesOfValue(Value V, Constant NewV) {
	bool Changed = false;
	for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
	Instruction I = cast<Instruction>(UI++);
	// Uses are non-trapping if null pointer is considered valid.
	// Non address-space 0 globals are already pruned by the caller.
	if (NullPointerIsDefined(I->getFunction()))
	return false;
	if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
	LI->setOperand(0, NewV);
	Changed = true;
	} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
	if (SI->getOperand(1) == V) {
	SI->setOperand(1, NewV);
	Changed = true;
	}
	} else if (isa<CallInst>(I) \|\| isa<InvokeInst>(I)) {
	CallBase *CB = cast<CallBase>(I);
	if (CB->getCalledOperand() == V) {
	// Calling through the pointer! Turn into a direct call, but be careful
	// that the pointer is not also being passed as an argument.
	CB->setCalledOperand(NewV);
	Changed = true;
	bool PassedAsArg = false;
	for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
	if (CB->getArgOperand(i) == V) {
	PassedAsArg = true;
	CB->setArgOperand(i, NewV);
	}

	if (PassedAsArg) {
	// Being passed as an argument also. Be careful to not invalidate UI!
	UI = V->user_begin();
	}
	}
	} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
	Changed \|= OptimizeAwayTrappingUsesOfValue(CI,
	ConstantExpr::getCast(CI->getOpcode(),
	NewV, CI->getType()));
	if (CI->use_empty()) {
	Changed = true;
	CI->eraseFromParent();
	}
	} else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
	// Should handle GEP here.
	SmallVector<Constant*, 8> Idxs;
	Idxs.reserve(GEPI->getNumOperands()-1);
	for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
	i != e; ++i)
	if (Constant C = dyn_cast<Constant>(i))
	Idxs.push_back(C);
	else
	break;
	if (Idxs.size() == GEPI->getNumOperands()-1)
	Changed \|= OptimizeAwayTrappingUsesOfValue(
	GEPI, ConstantExpr::getGetElementPtr(GEPI->getSourceElementType(),
	NewV, Idxs));
	if (GEPI->use_empty()) {
	Changed = true;
	GEPI->eraseFromParent();
	}
	}
	}

	return Changed;
	}

	/// The specified global has only one non-null value stored into it. If there
	/// are uses of the loaded value that would trap if the loaded value is
	/// dynamically null, then we know that they cannot be reachable with a null
	/// optimize away the load.
	static bool OptimizeAwayTrappingUsesOfLoads(
	GlobalVariable GV, Constant LV, const DataLayout &DL,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
	bool Changed = false;

	// Keep track of whether we are able to remove all the uses of the global
	// other than the store that defines it.
	bool AllNonStoreUsesGone = true;

	// Replace all uses of loads with uses of uses of the stored value.
	for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
	if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
	Changed \|= OptimizeAwayTrappingUsesOfValue(LI, LV);
	// If we were able to delete all uses of the loads
	if (LI->use_empty()) {
	LI->eraseFromParent();
	Changed = true;
	} else {
	AllNonStoreUsesGone = false;
	}
	} else if (isa<StoreInst>(GlobalUser)) {
	// Ignore the store that stores "LV" to the global.
	assert(GlobalUser->getOperand(1) == GV &&
	"Must be storing to the global");
	} else {
	AllNonStoreUsesGone = false;

	// If we get here we could have other crazy uses that are transitively
	// loaded.
	assert((isa<PHINode>(GlobalUser) \|\| isa<SelectInst>(GlobalUser) \|\|
	isa<ConstantExpr>(GlobalUser) \|\| isa<CmpInst>(GlobalUser) \|\|
	isa<BitCastInst>(GlobalUser) \|\|
	isa<GetElementPtrInst>(GlobalUser)) &&
	"Only expect load and stores!");
	}
	}

	if (Changed) {
	LLVM_DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV
	<< "\n");
	++NumGlobUses;
	}

	// If we nuked all of the loads, then none of the stores are needed either,
	// nor is the global.
	if (AllNonStoreUsesGone) {
	if (isLeakCheckerRoot(GV)) {
	Changed \|= CleanupPointerRootUsers(GV, GetTLI);
	} else {
	Changed = true;
	CleanupConstantGlobalUsers(GV, DL);
	}
	if (GV->use_empty()) {
	LLVM_DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
	Changed = true;
	GV->eraseFromParent();
	++NumDeleted;
	}
	}
	return Changed;
	}

	/// Walk the use list of V, constant folding all of the instructions that are
	/// foldable.
	static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
	TargetLibraryInfo *TLI) {
	for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
	if (Instruction I = dyn_cast<Instruction>(UI++))
	if (Constant *NewC = ConstantFoldInstruction(I, DL, TLI)) {
	I->replaceAllUsesWith(NewC);

	// Advance UI to the next non-I use to avoid invalidating it!
	// Instructions could multiply use V.
	while (UI != E && *UI == I)
	++UI;
	if (isInstructionTriviallyDead(I, TLI))
	I->eraseFromParent();
	}
	}

	/// This function takes the specified global variable, and transforms the
	/// program as if it always contained the result of the specified malloc.
	/// Because it is always the result of the specified malloc, there is no reason
	/// to actually DO the malloc. Instead, turn the malloc into a global, and any
	/// loads of GV as uses of the new global.
	static GlobalVariable *
	OptimizeGlobalAddressOfAllocation(GlobalVariable GV, CallInst CI,
	uint64_t AllocSize, Constant *InitVal,
	const DataLayout &DL,
	TargetLibraryInfo *TLI) {
	LLVM_DEBUG(errs() << "PROMOTING GLOBAL: " << GV << " CALL = " << CI
	<< '\n');

	// Create global of type [AllocSize x i8].
	Type *GlobalType = ArrayType::get(Type::getInt8Ty(GV->getContext()),
	AllocSize);

	// Create the new global variable. The contents of the allocated memory is
	// undefined initially, so initialize with an undef value.
	GlobalVariable *NewGV = new GlobalVariable(
	*GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
	UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
	GV->getThreadLocalMode());

	// Initialize the global at the point of the original call. Note that this
	// is a different point from the initialization referred to below for the
	// nullability handling. Sublety: We have not proven the original global was
	// only initialized once. As such, we can not fold this into the initializer
	// of the new global as may need to re-init the storage multiple times.
	if (!isa<UndefValue>(InitVal)) {
	IRBuilder<> Builder(CI->getNextNode());
	// TODO: Use alignment above if align!=1
	Builder.CreateMemSet(NewGV, InitVal, AllocSize, None);
	}

	// Update users of the allocation to use the new global instead.
	BitCastInst *TheBC = nullptr;
	while (!CI->use_empty()) {
	Instruction *User = cast<Instruction>(CI->user_back());
	if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
	if (BCI->getType() == NewGV->getType()) {
	BCI->replaceAllUsesWith(NewGV);
	BCI->eraseFromParent();
	} else {
	BCI->setOperand(0, NewGV);
	}
	} else {
	if (!TheBC)
	TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
	User->replaceUsesOfWith(CI, TheBC);
	}
	}

	SmallSetVector<Constant *, 1> RepValues;
	RepValues.insert(NewGV);

	// If there is a comparison against null, we will insert a global bool to
	// keep track of whether the global was initialized yet or not.
	GlobalVariable *InitBool =
	new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
	GlobalValue::InternalLinkage,
	ConstantInt::getFalse(GV->getContext()),
	GV->getName()+".init", GV->getThreadLocalMode());
	bool InitBoolUsed = false;

	// Loop over all instruction uses of GV, processing them in turn.
	SmallVector<Value *, 4> Guses;
	allUsesOfLoadAndStores(GV, Guses);
	for (auto *U : Guses) {
	if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
	// The global is initialized when the store to it occurs. If the stored
	// value is null value, the global bool is set to false, otherwise true.
	new StoreInst(ConstantInt::getBool(
	GV->getContext(),
	!isa<ConstantPointerNull>(SI->getValueOperand())),
	InitBool, false, Align(1), SI->getOrdering(),
	SI->getSyncScopeID(), SI);
	SI->eraseFromParent();
	continue;
	}

	LoadInst *LI = cast<LoadInst>(U);
	while (!LI->use_empty()) {
	Use &LoadUse = *LI->use_begin();
	ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
	if (!ICI) {
	auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
	RepValues.insert(CE);
	LoadUse.set(CE);
	continue;
	}

	// Replace the cmp X, 0 with a use of the bool value.
	Value *LV = new LoadInst(InitBool->getValueType(), InitBool,
	InitBool->getName() + ".val", false, Align(1),
	LI->getOrdering(), LI->getSyncScopeID(), LI);
	InitBoolUsed = true;
	switch (ICI->getPredicate()) {
	default: llvm_unreachable("Unknown ICmp Predicate!");
	case ICmpInst::ICMP_ULT: // X < null -> always false
	LV = ConstantInt::getFalse(GV->getContext());
	break;
	case ICmpInst::ICMP_UGE: // X >= null -> always true
	LV = ConstantInt::getTrue(GV->getContext());
	break;
	case ICmpInst::ICMP_ULE:
	case ICmpInst::ICMP_EQ:
	LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
	break;
	case ICmpInst::ICMP_NE:
	case ICmpInst::ICMP_UGT:
	break; // no change.
	}
	ICI->replaceAllUsesWith(LV);
	ICI->eraseFromParent();
	}
	LI->eraseFromParent();
	}

	// If the initialization boolean was used, insert it, otherwise delete it.
	if (!InitBoolUsed) {
	while (!InitBool->use_empty()) // Delete initializations
	cast<StoreInst>(InitBool->user_back())->eraseFromParent();
	delete InitBool;
	} else
	GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);

	// Now the GV is dead, nuke it and the allocation..
	GV->eraseFromParent();
	CI->eraseFromParent();

	// To further other optimizations, loop over all users of NewGV and try to
	// constant prop them. This will promote GEP instructions with constant
	// indices into GEP constant-exprs, which will allow global-opt to hack on it.
	for (auto *CE : RepValues)
	ConstantPropUsersOf(CE, DL, TLI);

	return NewGV;
	}

	/// Scan the use-list of GV checking to make sure that there are no complex uses
	/// of GV. We permit simple things like dereferencing the pointer, but not
	/// storing through the address, unless it is to the specified global.
	static bool
	valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
	const GlobalVariable *GV) {
	SmallPtrSet<const Value *, 4> Visited;
	SmallVector<const Value *, 4> Worklist;
	Worklist.push_back(CI);

	while (!Worklist.empty()) {
	const Value *V = Worklist.pop_back_val();
	if (!Visited.insert(V).second)
	continue;

	for (const Use &VUse : V->uses()) {
	const User *U = VUse.getUser();
	if (isa<LoadInst>(U) \|\| isa<CmpInst>(U))
	continue; // Fine, ignore.

	if (auto *SI = dyn_cast<StoreInst>(U)) {
	if (SI->getValueOperand() == V &&
	SI->getPointerOperand()->stripPointerCasts() != GV)
	return false; // Storing the pointer not into GV... bad.
	continue; // Otherwise, storing through it, or storing into GV... fine.
	}

	if (auto *BCI = dyn_cast<BitCastInst>(U)) {
	Worklist.push_back(BCI);
	continue;
	}

	if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
	Worklist.push_back(GEPI);
	continue;
	}

	return false;
	}
	}

	return true;
	}

	/// If we have a global that is only initialized with a fixed size allocation
	/// try to transform the program to use global memory instead of heap
	/// allocated memory. This eliminates dynamic allocation, avoids an indirection
	/// accessing the data, and exposes the resultant global to further GlobalOpt.
	static bool tryToOptimizeStoreOfAllocationToGlobal(GlobalVariable *GV,
	CallInst *CI,
	const DataLayout &DL,
	TargetLibraryInfo *TLI) {
	if (!isRemovableAlloc(CI, TLI))
	// Must be able to remove the call when we get done..
	return false;

	Type *Int8Ty = Type::getInt8Ty(CI->getFunction()->getContext());
	Constant *InitVal = getInitialValueOfAllocation(CI, TLI, Int8Ty);
	if (!InitVal)
	// Must be able to emit a memset for initialization
	return false;

	uint64_t AllocSize;
	if (!getObjectSize(CI, AllocSize, DL, TLI, ObjectSizeOpts()))
	return false;

	// Restrict this transformation to only working on small allocations
	// (2048 bytes currently), as we don't want to introduce a 16M global or
	// something.
	if (AllocSize >= 2048)
	return false;

	// We can't optimize this global unless all uses of it are known to be
	// of the malloc value, not of the null initializer value (consider a use
	// that compares the global's value against zero to see if the malloc has
	// been reached). To do this, we check to see if all uses of the global
	// would trap if the global were null: this proves that they must all
	// happen after the malloc.
	if (!allUsesOfLoadedValueWillTrapIfNull(GV))
	return false;

	// We can't optimize this if the malloc itself is used in a complex way,
	// for example, being stored into multiple globals. This allows the
	// malloc to be stored into the specified global, loaded, gep, icmp'd.
	// These are all things we could transform to using the global for.
	if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
	return false;

	OptimizeGlobalAddressOfAllocation(GV, CI, AllocSize, InitVal, DL, TLI);
	return true;
	}

	// Try to optimize globals based on the knowledge that only one value (besides
	// its initializer) is ever stored to the global.
	static bool
	optimizeOnceStoredGlobal(GlobalVariable GV, Value StoredOnceVal,
	const DataLayout &DL,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
	// Ignore no-op GEPs and bitcasts.
	StoredOnceVal = StoredOnceVal->stripPointerCasts();

	// If we are dealing with a pointer global that is initialized to null and
	// only has one (non-null) value stored into it, then we can optimize any
	// users of the loaded value (often calls and loads) that would trap if the
	// value was null.
	if (GV->getInitializer()->getType()->isPointerTy() &&
	GV->getInitializer()->isNullValue() &&
	StoredOnceVal->getType()->isPointerTy() &&
	!NullPointerIsDefined(
	nullptr /* F */,
	GV->getInitializer()->getType()->getPointerAddressSpace())) {
	if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
	if (GV->getInitializer()->getType() != SOVC->getType())
	SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());

	// Optimize away any trapping uses of the loaded value.
	if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
	return true;
	} else if (isAllocationFn(StoredOnceVal, GetTLI)) {
	if (auto *CI = dyn_cast<CallInst>(StoredOnceVal)) {
	auto TLI = &GetTLI(CI->getFunction());
	if (tryToOptimizeStoreOfAllocationToGlobal(GV, CI, DL, TLI))
	return true;
	}
	}
	}

	return false;
	}

	/// At this point, we have learned that the only two values ever stored into GV
	/// are its initializer and OtherVal. See if we can shrink the global into a
	/// boolean and select between the two values whenever it is used. This exposes
	/// the values to other scalar optimizations.
	static bool TryToShrinkGlobalToBoolean(GlobalVariable GV, Constant OtherVal) {
	Type *GVElType = GV->getValueType();

	// If GVElType is already i1, it is already shrunk. If the type of the GV is
	// an FP value, pointer or vector, don't do this optimization because a select
	// between them is very expensive and unlikely to lead to later
	// simplification. In these cases, we typically end up with "cond ? v1 : v2"
	// where v1 and v2 both require constant pool loads, a big loss.
	if (GVElType == Type::getInt1Ty(GV->getContext()) \|\|
	GVElType->isFloatingPointTy() \|\|
	GVElType->isPointerTy() \|\| GVElType->isVectorTy())
	return false;

	// Walk the use list of the global seeing if all the uses are load or store.
	// If there is anything else, bail out.
	for (User *U : GV->users()) {
	if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
	return false;
	if (getLoadStoreType(U) != GVElType)
	return false;
	}

	LLVM_DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV << "\n");

	// Create the new global, initializing it to false.
	GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
	false,
	GlobalValue::InternalLinkage,
	ConstantInt::getFalse(GV->getContext()),
	GV->getName()+".b",
	GV->getThreadLocalMode(),
	GV->getType()->getAddressSpace());
	NewGV->copyAttributesFrom(GV);
	GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);

	Constant *InitVal = GV->getInitializer();
	assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
	"No reason to shrink to bool!");

	SmallVector<DIGlobalVariableExpression *, 1> GVs;
	GV->getDebugInfo(GVs);

	// If initialized to zero and storing one into the global, we can use a cast
	// instead of a select to synthesize the desired value.
	bool IsOneZero = false;
	bool EmitOneOrZero = true;
	auto *CI = dyn_cast<ConstantInt>(OtherVal);
	if (CI && CI->getValue().getActiveBits() <= 64) {
	IsOneZero = InitVal->isNullValue() && CI->isOne();

	auto *CIInit = dyn_cast<ConstantInt>(GV->getInitializer());
	if (CIInit && CIInit->getValue().getActiveBits() <= 64) {
	uint64_t ValInit = CIInit->getZExtValue();
	uint64_t ValOther = CI->getZExtValue();
	uint64_t ValMinus = ValOther - ValInit;

	for(auto *GVe : GVs){
	DIGlobalVariable *DGV = GVe->getVariable();
	DIExpression *E = GVe->getExpression();
	const DataLayout &DL = GV->getParent()->getDataLayout();
	unsigned SizeInOctets =
	DL.getTypeAllocSizeInBits(NewGV->getValueType()) / 8;

	// It is expected that the address of global optimized variable is on
	// top of the stack. After optimization, value of that variable will
	// be ether 0 for initial value or 1 for other value. The following
	// expression should return constant integer value depending on the
	// value at global object address:
	// val * (ValOther - ValInit) + ValInit:
	// DW_OP_deref DW_OP_constu <ValMinus>
	// DW_OP_mul DW_OP_constu <ValInit> DW_OP_plus DW_OP_stack_value
	SmallVector<uint64_t, 12> Ops = {
	dwarf::DW_OP_deref_size, SizeInOctets,
	dwarf::DW_OP_constu, ValMinus,
	dwarf::DW_OP_mul, dwarf::DW_OP_constu, ValInit,
	dwarf::DW_OP_plus};
	bool WithStackValue = true;
	E = DIExpression::prependOpcodes(E, Ops, WithStackValue);
	DIGlobalVariableExpression *DGVE =
	DIGlobalVariableExpression::get(NewGV->getContext(), DGV, E);
	NewGV->addDebugInfo(DGVE);
	}
	EmitOneOrZero = false;
	}
	}

	if (EmitOneOrZero) {
	// FIXME: This will only emit address for debugger on which will
	// be written only 0 or 1.
	for(auto *GV : GVs)
	NewGV->addDebugInfo(GV);
	}

	while (!GV->use_empty()) {
	Instruction *UI = cast<Instruction>(GV->user_back());
	if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
	// Change the store into a boolean store.
	bool StoringOther = SI->getOperand(0) == OtherVal;
	// Only do this if we weren't storing a loaded value.
	Value *StoreVal;
	if (StoringOther \|\| SI->getOperand(0) == InitVal) {
	StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
	StoringOther);
	} else {
	// Otherwise, we are storing a previously loaded copy. To do this,
	// change the copy from copying the original value to just copying the
	// bool.
	Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));

	// If we've already replaced the input, StoredVal will be a cast or
	// select instruction. If not, it will be a load of the original
	// global.
	if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
	assert(LI->getOperand(0) == GV && "Not a copy!");
	// Insert a new load, to preserve the saved value.
	StoreVal = new LoadInst(NewGV->getValueType(), NewGV,
	LI->getName() + ".b", false, Align(1),
	LI->getOrdering(), LI->getSyncScopeID(), LI);
	} else {
	assert((isa<CastInst>(StoredVal) \|\| isa<SelectInst>(StoredVal)) &&
	"This is not a form that we understand!");
	StoreVal = StoredVal->getOperand(0);
	assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
	}
	}
	StoreInst *NSI =
	new StoreInst(StoreVal, NewGV, false, Align(1), SI->getOrdering(),
	SI->getSyncScopeID(), SI);
	NSI->setDebugLoc(SI->getDebugLoc());
	} else {
	// Change the load into a load of bool then a select.
	LoadInst *LI = cast<LoadInst>(UI);
	LoadInst *NLI = new LoadInst(NewGV->getValueType(), NewGV,
	LI->getName() + ".b", false, Align(1),
	LI->getOrdering(), LI->getSyncScopeID(), LI);
	Instruction *NSI;
	if (IsOneZero)
	NSI = new ZExtInst(NLI, LI->getType(), "", LI);
	else
	NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
	NSI->takeName(LI);
	// Since LI is split into two instructions, NLI and NSI both inherit the
	// same DebugLoc
	NLI->setDebugLoc(LI->getDebugLoc());
	NSI->setDebugLoc(LI->getDebugLoc());
	LI->replaceAllUsesWith(NSI);
	}
	UI->eraseFromParent();
	}

	// Retain the name of the old global variable. People who are debugging their
	// programs may expect these variables to be named the same.
	NewGV->takeName(GV);
	GV->eraseFromParent();
	return true;
	}

	static bool
	deleteIfDead(GlobalValue &GV,
	SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
	function_ref<void(Function &)> DeleteFnCallback = nullptr) {
	GV.removeDeadConstantUsers();

	if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
	return false;

	if (const Comdat *C = GV.getComdat())
	if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
	return false;

	bool Dead;
	if (auto *F = dyn_cast<Function>(&GV))
	Dead = (F->isDeclaration() && F->use_empty()) \|\| F->isDefTriviallyDead();
	else
	Dead = GV.use_empty();
	if (!Dead)
	return false;

	LLVM_DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
	if (auto *F = dyn_cast<Function>(&GV)) {
	if (DeleteFnCallback)
	DeleteFnCallback(*F);
	}
	GV.eraseFromParent();
	++NumDeleted;
	return true;
	}

	static bool isPointerValueDeadOnEntryToFunction(
	const Function F, GlobalValue GV,
	function_ref<DominatorTree &(Function &)> LookupDomTree) {
	// Find all uses of GV. We expect them all to be in F, and if we can't
	// identify any of the uses we bail out.
	//
	// On each of these uses, identify if the memory that GV points to is
	// used/required/live at the start of the function. If it is not, for example
	// if the first thing the function does is store to the GV, the GV can
	// possibly be demoted.
	//
	// We don't do an exhaustive search for memory operations - simply look
	// through bitcasts as they're quite common and benign.
	const DataLayout &DL = GV->getParent()->getDataLayout();
	SmallVector<LoadInst *, 4> Loads;
	SmallVector<StoreInst *, 4> Stores;
	for (auto *U : GV->users()) {
	if (Operator::getOpcode(U) == Instruction::BitCast) {
	for (auto *UU : U->users()) {
	if (auto *LI = dyn_cast<LoadInst>(UU))
	Loads.push_back(LI);
	else if (auto *SI = dyn_cast<StoreInst>(UU))
	Stores.push_back(SI);
	else
	return false;
	}
	continue;
	}

	Instruction *I = dyn_cast<Instruction>(U);
	if (!I)
	return false;
	assert(I->getParent()->getParent() == F);

	if (auto *LI = dyn_cast<LoadInst>(I))
	Loads.push_back(LI);
	else if (auto *SI = dyn_cast<StoreInst>(I))
	Stores.push_back(SI);
	else
	return false;
	}

	// We have identified all uses of GV into loads and stores. Now check if all
	// of them are known not to depend on the value of the global at the function
	// entry point. We do this by ensuring that every load is dominated by at
	// least one store.
	auto &DT = LookupDomTree(const_cast<Function >(F));

	// The below check is quadratic. Check we're not going to do too many tests.
	// FIXME: Even though this will always have worst-case quadratic time, we
	// could put effort into minimizing the average time by putting stores that
	// have been shown to dominate at least one load at the beginning of the
	// Stores array, making subsequent dominance checks more likely to succeed
	// early.
	//
	// The threshold here is fairly large because global->local demotion is a
	// very powerful optimization should it fire.
	const unsigned Threshold = 100;
	if (Loads.size() * Stores.size() > Threshold)
	return false;

	for (auto *L : Loads) {
	auto *LTy = L->getType();
	if (none_of(Stores, [&](const StoreInst *S) {
	auto *STy = S->getValueOperand()->getType();
	// The load is only dominated by the store if DomTree says so
	// and the number of bits loaded in L is less than or equal to
	// the number of bits stored in S.
	return DT.dominates(S, L) &&
	DL.getTypeStoreSize(LTy).getFixedSize() <=
	DL.getTypeStoreSize(STy).getFixedSize();
	}))
	return false;
	}
	// All loads have known dependences inside F, so the global can be localized.
	return true;
	}

	/// C may have non-instruction users. Can all of those users be turned into
	/// instructions?
	static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
	// We don't do this exhaustively. The most common pattern that we really need
	// to care about is a constant GEP or constant bitcast - so just looking
	// through one single ConstantExpr.
	//
	// The set of constants that this function returns true for must be able to be
	// handled by makeAllConstantUsesInstructions.
	for (auto *U : C->users()) {
	if (isa<Instruction>(U))
	continue;
	if (!isa<ConstantExpr>(U))
	// Non instruction, non-constantexpr user; cannot convert this.
	return false;
	for (auto *UU : U->users())
	if (!isa<Instruction>(UU))
	// A constantexpr used by another constant. We don't try and recurse any
	// further but just bail out at this point.
	return false;
	}

	return true;
	}

	/// C may have non-instruction users, and
	/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
	/// non-instruction users to instructions.
	static void makeAllConstantUsesInstructions(Constant *C) {
	SmallVector<ConstantExpr*,4> Users;
	for (auto *U : C->users()) {
	if (isa<ConstantExpr>(U))
	Users.push_back(cast<ConstantExpr>(U));
	else
	// We should never get here; allNonInstructionUsersCanBeMadeInstructions
	// should not have returned true for C.
	assert(
	isa<Instruction>(U) &&
	"Can't transform non-constantexpr non-instruction to instruction!");
	}

	SmallVector<Value*,4> UUsers;
	for (auto *U : Users) {
	UUsers.clear();
	append_range(UUsers, U->users());
	for (auto *UU : UUsers) {
	Instruction *UI = cast<Instruction>(UU);
	Instruction *NewU = U->getAsInstruction(UI);
	UI->replaceUsesOfWith(U, NewU);
	}
	// We've replaced all the uses, so destroy the constant. (destroyConstant
	// will update value handles and metadata.)
	U->destroyConstant();
	}
	}

	// For a global variable with one store, if the store dominates any loads,
	// those loads will always load the stored value (as opposed to the
	// initializer), even in the presence of recursion.
	static bool forwardStoredOnceStore(
	GlobalVariable GV, const StoreInst StoredOnceStore,
	function_ref<DominatorTree &(Function &)> LookupDomTree) {
	const Value *StoredOnceValue = StoredOnceStore->getValueOperand();
	// We can do this optimization for non-constants in nosync + norecurse
	// functions, but globals used in exactly one norecurse functions are already
	// promoted to an alloca.
	if (!isa<Constant>(StoredOnceValue))
	return false;
	const Function *F = StoredOnceStore->getFunction();
	SmallVector<LoadInst *> Loads;
	for (User *U : GV->users()) {
	if (auto *LI = dyn_cast<LoadInst>(U)) {
	if (LI->getFunction() == F &&
	LI->getType() == StoredOnceValue->getType() && LI->isSimple())
	Loads.push_back(LI);
	}
	}
	// Only compute DT if we have any loads to examine.
	bool MadeChange = false;
	if (!Loads.empty()) {
	auto &DT = LookupDomTree(const_cast<Function >(F));
	for (auto *LI : Loads) {
	if (DT.dominates(StoredOnceStore, LI)) {
	LI->replaceAllUsesWith(const_cast<Value *>(StoredOnceValue));
	LI->eraseFromParent();
	MadeChange = true;
	}
	}
	}
	return MadeChange;
	}

	/// Analyze the specified global variable and optimize
	/// it if possible. If we make a change, return true.
	static bool
	processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
	function_ref<TargetTransformInfo &(Function &)> GetTTI,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI,
	function_ref<DominatorTree &(Function &)> LookupDomTree) {
	auto &DL = GV->getParent()->getDataLayout();
	// If this is a first class global and has only one accessing function and
	// this function is non-recursive, we replace the global with a local alloca
	// in this function.
	//
	// NOTE: It doesn't make sense to promote non-single-value types since we
	// are just replacing static memory to stack memory.
	//
	// If the global is in different address space, don't bring it to stack.
	if (!GS.HasMultipleAccessingFunctions &&
	GS.AccessingFunction &&
	GV->getValueType()->isSingleValueType() &&
	GV->getType()->getAddressSpace() == 0 &&
	!GV->isExternallyInitialized() &&
	allNonInstructionUsersCanBeMadeInstructions(GV) &&
	GS.AccessingFunction->doesNotRecurse() &&
	isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
	LookupDomTree)) {
	const DataLayout &DL = GV->getParent()->getDataLayout();

	LLVM_DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
	Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
	->getEntryBlock().begin());
	Type *ElemTy = GV->getValueType();
	// FIXME: Pass Global's alignment when globals have alignment
	AllocaInst *Alloca = new AllocaInst(ElemTy, DL.getAllocaAddrSpace(), nullptr,
	GV->getName(), &FirstI);
	if (!isa<UndefValue>(GV->getInitializer()))
	new StoreInst(GV->getInitializer(), Alloca, &FirstI);

	makeAllConstantUsesInstructions(GV);

	GV->replaceAllUsesWith(Alloca);
	GV->eraseFromParent();
	++NumLocalized;
	return true;
	}

	bool Changed = false;

	// If the global is never loaded (but may be stored to), it is dead.
	// Delete it now.
	if (!GS.IsLoaded) {
	LLVM_DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");

	if (isLeakCheckerRoot(GV)) {
	// Delete any constant stores to the global.
	Changed = CleanupPointerRootUsers(GV, GetTLI);
	} else {
	// Delete any stores we can find to the global. We may not be able to
	// make it completely dead though.
	Changed = CleanupConstantGlobalUsers(GV, DL);
	}

	// If the global is dead now, delete it.
	if (GV->use_empty()) {
	GV->eraseFromParent();
	++NumDeleted;
	Changed = true;
	}
	return Changed;

	}
	if (GS.StoredType <= GlobalStatus::InitializerStored) {
	LLVM_DEBUG(dbgs() << "MARKING CONSTANT: " << *GV << "\n");

	// Don't actually mark a global constant if it's atomic because atomic loads
	// are implemented by a trivial cmpxchg in some edge-cases and that usually
	// requires write access to the variable even if it's not actually changed.
	if (GS.Ordering == AtomicOrdering::NotAtomic) {
	assert(!GV->isConstant() && "Expected a non-constant global");
	GV->setConstant(true);
	Changed = true;
	}

	// Clean up any obviously simplifiable users now.
	Changed \|= CleanupConstantGlobalUsers(GV, DL);

	// If the global is dead now, just nuke it.
	if (GV->use_empty()) {
	LLVM_DEBUG(dbgs() << " *** Marking constant allowed us to simplify "
	<< "all users and delete global!\n");
	GV->eraseFromParent();
	++NumDeleted;
	return true;
	}

	// Fall through to the next check; see if we can optimize further.
	++NumMarked;
	}
	if (!GV->getInitializer()->getType()->isSingleValueType()) {
	const DataLayout &DL = GV->getParent()->getDataLayout();
	if (SRAGlobal(GV, DL))
	return true;
	}
	Value *StoredOnceValue = GS.getStoredOnceValue();
	if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
	Function &StoreFn =
	const_cast<Function &>(*GS.StoredOnceStore->getFunction());
	bool CanHaveNonUndefGlobalInitializer =
	GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
	GV->getType()->getAddressSpace());
	// If the initial value for the global was an undef value, and if only
	// one other value was stored into it, we can just change the
	// initializer to be the stored value, then delete all stores to the
	// global. This allows us to mark it constant.
	// This is restricted to address spaces that allow globals to have
	// initializers. NVPTX, for example, does not support initializers for
	// shared memory (AS 3).
	auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
	if (SOVConstant && isa<UndefValue>(GV->getInitializer()) &&
	DL.getTypeAllocSize(SOVConstant->getType()) ==
	DL.getTypeAllocSize(GV->getValueType()) &&
	CanHaveNonUndefGlobalInitializer) {
	if (SOVConstant->getType() == GV->getValueType()) {
	// Change the initializer in place.
	GV->setInitializer(SOVConstant);
	} else {
	// Create a new global with adjusted type.
	auto *NGV = new GlobalVariable(
	*GV->getParent(), SOVConstant->getType(), GV->isConstant(),
	GV->getLinkage(), SOVConstant, "", GV, GV->getThreadLocalMode(),
	GV->getAddressSpace());
	NGV->takeName(GV);
	NGV->copyAttributesFrom(GV);
	GV->replaceAllUsesWith(ConstantExpr::getBitCast(NGV, GV->getType()));
	GV->eraseFromParent();
	GV = NGV;
	}

	// Clean up any obviously simplifiable users now.
	CleanupConstantGlobalUsers(GV, DL);

	if (GV->use_empty()) {
	LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
	<< "simplify all users and delete global!\n");
	GV->eraseFromParent();
	++NumDeleted;
	}
	++NumSubstitute;
	return true;
	}

	// Try to optimize globals based on the knowledge that only one value
	// (besides its initializer) is ever stored to the global.
	if (optimizeOnceStoredGlobal(GV, StoredOnceValue, DL, GetTLI))
	return true;

	// Try to forward the store to any loads. If we have more than one store, we
	// may have a store of the initializer between StoredOnceStore and a load.
	if (GS.NumStores == 1)
	if (forwardStoredOnceStore(GV, GS.StoredOnceStore, LookupDomTree))
	return true;

	// Otherwise, if the global was not a boolean, we can shrink it to be a
	// boolean. Skip this optimization for AS that doesn't allow an initializer.
	if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
	(!isa<UndefValue>(GV->getInitializer()) \|\|
	CanHaveNonUndefGlobalInitializer)) {
	if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
	++NumShrunkToBool;
	return true;
	}
	}
	}

	return Changed;
	}

	/// Analyze the specified global variable and optimize it if possible. If we
	/// make a change, return true.
	static bool
	processGlobal(GlobalValue &GV,
	function_ref<TargetTransformInfo &(Function &)> GetTTI,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI,
	function_ref<DominatorTree &(Function &)> LookupDomTree) {
	if (GV.getName().startswith("llvm."))
	return false;

	GlobalStatus GS;

	if (GlobalStatus::analyzeGlobal(&GV, GS))
	return false;

	bool Changed = false;
	if (!GS.IsCompared && !GV.hasGlobalUnnamedAddr()) {
	auto NewUnnamedAddr = GV.hasLocalLinkage() ? GlobalValue::UnnamedAddr::Global
	: GlobalValue::UnnamedAddr::Local;
	if (NewUnnamedAddr != GV.getUnnamedAddr()) {
	GV.setUnnamedAddr(NewUnnamedAddr);
	NumUnnamed++;
	Changed = true;
	}
	}

	// Do more involved optimizations if the global is internal.
	if (!GV.hasLocalLinkage())
	return Changed;

	auto *GVar = dyn_cast<GlobalVariable>(&GV);
	if (!GVar)
	return Changed;

	if (GVar->isConstant() \|\| !GVar->hasInitializer())
	return Changed;

	return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) \|\|
	Changed;
	}

	/// Walk all of the direct calls of the specified function, changing them to
	/// FastCC.
	static void ChangeCalleesToFastCall(Function *F) {
	for (User *U : F->users()) {
	if (isa<BlockAddress>(U))
	continue;
	cast<CallBase>(U)->setCallingConv(CallingConv::Fast);
	}
	}

	static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
	Attribute::AttrKind A) {
	unsigned AttrIndex;
	if (Attrs.hasAttrSomewhere(A, &AttrIndex))
	return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
	return Attrs;
	}

	static void RemoveAttribute(Function *F, Attribute::AttrKind A) {
	F->setAttributes(StripAttr(F->getContext(), F->getAttributes(), A));
	for (User *U : F->users()) {
	if (isa<BlockAddress>(U))
	continue;
	CallBase *CB = cast<CallBase>(U);
	CB->setAttributes(StripAttr(F->getContext(), CB->getAttributes(), A));
	}
	}

	/// Return true if this is a calling convention that we'd like to change. The
	/// idea here is that we don't want to mess with the convention if the user
	/// explicitly requested something with performance implications like coldcc,
	/// GHC, or anyregcc.
	static bool hasChangeableCC(Function *F) {
	CallingConv::ID CC = F->getCallingConv();

	// FIXME: Is it worth transforming x86_stdcallcc and x86_fastcallcc?
	if (CC != CallingConv::C && CC != CallingConv::X86_ThisCall)
	return false;

	// FIXME: Change CC for the whole chain of musttail calls when possible.
	//
	// Can't change CC of the function that either has musttail calls, or is a
	// musttail callee itself
	for (User *U : F->users()) {
	if (isa<BlockAddress>(U))
	continue;
	CallInst* CI = dyn_cast<CallInst>(U);
	if (!CI)
	continue;

	if (CI->isMustTailCall())
	return false;
	}

	for (BasicBlock &BB : *F)
	if (BB.getTerminatingMustTailCall())
	return false;

	return true;
	}

	/// Return true if the block containing the call site has a BlockFrequency of
	/// less than ColdCCRelFreq% of the entry block.
	static bool isColdCallSite(CallBase &CB, BlockFrequencyInfo &CallerBFI) {
	const BranchProbability ColdProb(ColdCCRelFreq, 100);
	auto *CallSiteBB = CB.getParent();
	auto CallSiteFreq = CallerBFI.getBlockFreq(CallSiteBB);
	auto CallerEntryFreq =
	CallerBFI.getBlockFreq(&(CB.getCaller()->getEntryBlock()));
	return CallSiteFreq < CallerEntryFreq * ColdProb;
	}

	// This function checks if the input function F is cold at all call sites. It
	// also looks each call site's containing function, returning false if the
	// caller function contains other non cold calls. The input vector AllCallsCold
	// contains a list of functions that only have call sites in cold blocks.
	static bool
	isValidCandidateForColdCC(Function &F,
	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
	const std::vector<Function *> &AllCallsCold) {

	if (F.user_empty())
	return false;

	for (User *U : F.users()) {
	if (isa<BlockAddress>(U))
	continue;

	CallBase &CB = cast<CallBase>(*U);
	Function *CallerFunc = CB.getParent()->getParent();
	BlockFrequencyInfo &CallerBFI = GetBFI(*CallerFunc);
	if (!isColdCallSite(CB, CallerBFI))
	return false;
	if (!llvm::is_contained(AllCallsCold, CallerFunc))
	return false;
	}
	return true;
	}

	static void changeCallSitesToColdCC(Function *F) {
	for (User *U : F->users()) {
	if (isa<BlockAddress>(U))
	continue;
	cast<CallBase>(U)->setCallingConv(CallingConv::Cold);
	}
	}

	// This function iterates over all the call instructions in the input Function
	// and checks that all call sites are in cold blocks and are allowed to use the
	// coldcc calling convention.
	static bool
	hasOnlyColdCalls(Function &F,
	function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
	for (BasicBlock &BB : F) {
	for (Instruction &I : BB) {
	if (CallInst *CI = dyn_cast<CallInst>(&I)) {
	// Skip over isline asm instructions since they aren't function calls.
	if (CI->isInlineAsm())
	continue;
	Function *CalledFn = CI->getCalledFunction();
	if (!CalledFn)
	return false;
	if (!CalledFn->hasLocalLinkage())
	return false;
	// Skip over intrinsics since they won't remain as function calls.
	if (CalledFn->getIntrinsicID() != Intrinsic::not_intrinsic)
	continue;
	// Check if it's valid to use coldcc calling convention.
	if (!hasChangeableCC(CalledFn) \|\| CalledFn->isVarArg() \|\|
	CalledFn->hasAddressTaken())
	return false;
	BlockFrequencyInfo &CallerBFI = GetBFI(F);
	if (!isColdCallSite(*CI, CallerBFI))
	return false;
	}
	}
	}
	return true;
	}

	static bool hasMustTailCallers(Function *F) {
	for (User *U : F->users()) {
	CallBase *CB = dyn_cast<CallBase>(U);
	if (!CB) {
	assert(isa<BlockAddress>(U) &&
	"Expected either CallBase or BlockAddress");
	continue;
	}
	if (CB->isMustTailCall())
	return true;
	}
	return false;
	}

	static bool hasInvokeCallers(Function *F) {
	for (User *U : F->users())
	if (isa<InvokeInst>(U))
	return true;
	return false;
	}

	static void RemovePreallocated(Function *F) {
	RemoveAttribute(F, Attribute::Preallocated);

	auto *M = F->getParent();

	IRBuilder<> Builder(M->getContext());

	// Cannot modify users() while iterating over it, so make a copy.
	SmallVector<User *, 4> PreallocatedCalls(F->users());
	for (User *U : PreallocatedCalls) {
	CallBase *CB = dyn_cast<CallBase>(U);
	if (!CB)
	continue;

	assert(
	!CB->isMustTailCall() &&
	"Shouldn't call RemotePreallocated() on a musttail preallocated call");
	// Create copy of call without "preallocated" operand bundle.
	SmallVector<OperandBundleDef, 1> OpBundles;
	CB->getOperandBundlesAsDefs(OpBundles);
	CallBase *PreallocatedSetup = nullptr;
	for (auto *It = OpBundles.begin(); It != OpBundles.end(); ++It) {
	if (It->getTag() == "preallocated") {
	PreallocatedSetup = cast<CallBase>(*It->input_begin());
	OpBundles.erase(It);
	break;
	}
	}
	assert(PreallocatedSetup && "Did not find preallocated bundle");
	uint64_t ArgCount =
	cast<ConstantInt>(PreallocatedSetup->getArgOperand(0))->getZExtValue();

	assert((isa<CallInst>(CB) \|\| isa<InvokeInst>(CB)) &&
	"Unknown indirect call type");
	CallBase *NewCB = CallBase::Create(CB, OpBundles, CB);
	CB->replaceAllUsesWith(NewCB);
	NewCB->takeName(CB);
	CB->eraseFromParent();

	Builder.SetInsertPoint(PreallocatedSetup);
	auto *StackSave =
	Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave));

	Builder.SetInsertPoint(NewCB->getNextNonDebugInstruction());
	Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
	StackSave);

	// Replace @llvm.call.preallocated.arg() with alloca.
	// Cannot modify users() while iterating over it, so make a copy.
	// @llvm.call.preallocated.arg() can be called with the same index multiple
	// times. So for each @llvm.call.preallocated.arg(), we see if we have
	// already created a Value* for the index, and if not, create an alloca and
	// bitcast right after the @llvm.call.preallocated.setup() so that it
	// dominates all uses.
	SmallVector<Value *, 2> ArgAllocas(ArgCount);
	SmallVector<User *, 2> PreallocatedArgs(PreallocatedSetup->users());
	for (auto *User : PreallocatedArgs) {
	auto *UseCall = cast<CallBase>(User);
	assert(UseCall->getCalledFunction()->getIntrinsicID() ==
	Intrinsic::call_preallocated_arg &&
	"preallocated token use was not a llvm.call.preallocated.arg");
	uint64_t AllocArgIndex =
	cast<ConstantInt>(UseCall->getArgOperand(1))->getZExtValue();
	Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
	if (!AllocaReplacement) {
	auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
	auto *ArgType =
	UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
	auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
	Builder.SetInsertPoint(InsertBefore);
	auto *Alloca =
	Builder.CreateAlloca(ArgType, AddressSpace, nullptr, "paarg");
	auto *BitCast = Builder.CreateBitCast(
	Alloca, Type::getInt8PtrTy(M->getContext()), UseCall->getName());
	ArgAllocas[AllocArgIndex] = BitCast;
	AllocaReplacement = BitCast;
	}

	UseCall->replaceAllUsesWith(AllocaReplacement);
	UseCall->eraseFromParent();
	}
	// Remove @llvm.call.preallocated.setup().
	cast<Instruction>(PreallocatedSetup)->eraseFromParent();
	}
	}

	static bool
	OptimizeFunctions(Module &M,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI,
	function_ref<TargetTransformInfo &(Function &)> GetTTI,
	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
	function_ref<DominatorTree &(Function &)> LookupDomTree,
	SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats,
	function_ref<void(Function &F)> ChangedCFGCallback,
	function_ref<void(Function &F)> DeleteFnCallback) {

	bool Changed = false;

	std::vector<Function *> AllCallsCold;
	for (Function &F : llvm::make_early_inc_range(M))
	if (hasOnlyColdCalls(F, GetBFI))
	AllCallsCold.push_back(&F);

	// Optimize functions.
	for (Function &F : llvm::make_early_inc_range(M)) {
	// Don't perform global opt pass on naked functions; we don't want fast
	// calling conventions for naked functions.
	if (F.hasFnAttribute(Attribute::Naked))
	continue;

	// Functions without names cannot be referenced outside this module.
	if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
	F.setLinkage(GlobalValue::InternalLinkage);

	if (deleteIfDead(F, NotDiscardableComdats, DeleteFnCallback)) {
	Changed = true;
	continue;
	}

	// LLVM's definition of dominance allows instructions that are cyclic
	// in unreachable blocks, e.g.:
	// %pat = select i1 %condition, @global, i16* %pat
	// because any instruction dominates an instruction in a block that's
	// not reachable from entry.
	// So, remove unreachable blocks from the function, because a) there's
	// no point in analyzing them and b) GlobalOpt should otherwise grow
	// some more complicated logic to break these cycles.
	// Notify the analysis manager that we've modified the function's CFG.
	if (!F.isDeclaration()) {
	if (removeUnreachableBlocks(F)) {
	Changed = true;
	ChangedCFGCallback(F);
	}
	}

	Changed \|= processGlobal(F, GetTTI, GetTLI, LookupDomTree);

	if (!F.hasLocalLinkage())
	continue;

	// If we have an inalloca parameter that we can safely remove the
	// inalloca attribute from, do so. This unlocks optimizations that
	// wouldn't be safe in the presence of inalloca.
	// FIXME: We should also hoist alloca affected by this to the entry
	// block if possible.
	if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
	- !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
	+ !F.hasAddressTaken() && !hasMustTailCallers(&F) && !F.isVarArg()) {
	RemoveAttribute(&F, Attribute::InAlloca);
	Changed = true;
	}

	// FIXME: handle invokes
	// FIXME: handle musttail
	if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
	if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
	!hasInvokeCallers(&F)) {
	RemovePreallocated(&F);
	Changed = true;
	}
	continue;
	}

	if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
	NumInternalFunc++;
	TargetTransformInfo &TTI = GetTTI(F);
	// Change the calling convention to coldcc if either stress testing is
	// enabled or the target would like to use coldcc on functions which are
	// cold at all call sites and the callers contain no other non coldcc
	// calls.
	if (EnableColdCCStressTest \|\|
	(TTI.useColdCCForColdCall(F) &&
	isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
	F.setCallingConv(CallingConv::Cold);
	changeCallSitesToColdCC(&F);
	Changed = true;
	NumColdCC++;
	}
	}

	if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
	// If this function has a calling convention worth changing, is not a
	// varargs function, and is only called directly, promote it to use the
	// Fast calling convention.
	F.setCallingConv(CallingConv::Fast);
	ChangeCalleesToFastCall(&F);
	++NumFastCallFns;
	Changed = true;
	}

	if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
	!F.hasAddressTaken()) {
	// The function is not used by a trampoline intrinsic, so it is safe
	// to remove the 'nest' attribute.
	RemoveAttribute(&F, Attribute::Nest);
	++NumNestRemoved;
	Changed = true;
	}
	}
	return Changed;
	}

	static bool
	OptimizeGlobalVars(Module &M,
	function_ref<TargetTransformInfo &(Function &)> GetTTI,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI,
	function_ref<DominatorTree &(Function &)> LookupDomTree,
	SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
	bool Changed = false;

	for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
	// Global variables without names cannot be referenced outside this module.
	if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
	GV.setLinkage(GlobalValue::InternalLinkage);
	// Simplify the initializer.
	if (GV.hasInitializer())
	if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
	auto &DL = M.getDataLayout();
	// TLI is not used in the case of a Constant, so use default nullptr
	// for that optional parameter, since we don't have a Function to
	// provide GetTLI anyway.
	Constant New = ConstantFoldConstant(C, DL, /TLI*/ nullptr);
	if (New != C)
	GV.setInitializer(New);
	}

	if (deleteIfDead(GV, NotDiscardableComdats)) {
	Changed = true;
	continue;
	}

	Changed \|= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
	}
	return Changed;
	}

	/// Evaluate static constructors in the function, if we can. Return true if we
	/// can, false otherwise.
	static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
	TargetLibraryInfo *TLI) {
	// Skip external functions.
	if (F->isDeclaration())
	return false;
	// Call the function.
	Evaluator Eval(DL, TLI);
	Constant *RetValDummy;
	bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
	SmallVector<Constant*, 0>());

	if (EvalSuccess) {
	++NumCtorsEvaluated;

	// We succeeded at evaluation: commit the result.
	auto NewInitializers = Eval.getMutatedInitializers();
	LLVM_DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
	<< F->getName() << "' to " << NewInitializers.size()
	<< " stores.\n");
	for (const auto &Pair : NewInitializers)
	Pair.first->setInitializer(Pair.second);
	for (GlobalVariable *GV : Eval.getInvariants())
	GV->setConstant(true);
	}

	return EvalSuccess;
	}

	static int compareNames(Constant const A, Constant const B) {
	Value AStripped = (A)->stripPointerCasts();
	Value BStripped = (B)->stripPointerCasts();
	return AStripped->getName().compare(BStripped->getName());
	}

	static void setUsedInitializer(GlobalVariable &V,
	const SmallPtrSetImpl<GlobalValue *> &Init) {
	if (Init.empty()) {
	V.eraseFromParent();
	return;
	}

	// Type of pointer to the array of pointers.
	PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);

	SmallVector<Constant *, 8> UsedArray;
	for (GlobalValue *GV : Init) {
	Constant *Cast
	= ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
	UsedArray.push_back(Cast);
	}
	// Sort to get deterministic order.
	array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
	ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());

	Module *M = V.getParent();
	V.removeFromParent();
	GlobalVariable *NV =
	new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
	ConstantArray::get(ATy, UsedArray), "");
	NV->takeName(&V);
	NV->setSection("llvm.metadata");
	delete &V;
	}

	namespace {

	/// An easy to access representation of llvm.used and llvm.compiler.used.
	class LLVMUsed {
	SmallPtrSet<GlobalValue *, 4> Used;
	SmallPtrSet<GlobalValue *, 4> CompilerUsed;
	GlobalVariable *UsedV;
	GlobalVariable *CompilerUsedV;

	public:
	LLVMUsed(Module &M) {
	SmallVector<GlobalValue *, 4> Vec;
	UsedV = collectUsedGlobalVariables(M, Vec, false);
	Used = {Vec.begin(), Vec.end()};
	Vec.clear();
	CompilerUsedV = collectUsedGlobalVariables(M, Vec, true);
	CompilerUsed = {Vec.begin(), Vec.end()};
	}

	using iterator = SmallPtrSet<GlobalValue *, 4>::iterator;
	using used_iterator_range = iterator_range<iterator>;

	iterator usedBegin() { return Used.begin(); }
	iterator usedEnd() { return Used.end(); }

	used_iterator_range used() {
	return used_iterator_range(usedBegin(), usedEnd());
	}

	iterator compilerUsedBegin() { return CompilerUsed.begin(); }
	iterator compilerUsedEnd() { return CompilerUsed.end(); }

	used_iterator_range compilerUsed() {
	return used_iterator_range(compilerUsedBegin(), compilerUsedEnd());
	}

	bool usedCount(GlobalValue *GV) const { return Used.count(GV); }

	bool compilerUsedCount(GlobalValue *GV) const {
	return CompilerUsed.count(GV);
	}

	bool usedErase(GlobalValue *GV) { return Used.erase(GV); }
	bool compilerUsedErase(GlobalValue *GV) { return CompilerUsed.erase(GV); }
	bool usedInsert(GlobalValue *GV) { return Used.insert(GV).second; }

	bool compilerUsedInsert(GlobalValue *GV) {
	return CompilerUsed.insert(GV).second;
	}

	void syncVariablesAndSets() {
	if (UsedV)
	setUsedInitializer(*UsedV, Used);
	if (CompilerUsedV)
	setUsedInitializer(*CompilerUsedV, CompilerUsed);
	}
	};

	} // end anonymous namespace

	static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
	if (GA.use_empty()) // No use at all.
	return false;

	assert((!U.usedCount(&GA) \|\| !U.compilerUsedCount(&GA)) &&
	"We should have removed the duplicated "
	"element from llvm.compiler.used");
	if (!GA.hasOneUse())
	// Strictly more than one use. So at least one is not in llvm.used and
	// llvm.compiler.used.
	return true;

	// Exactly one use. Check if it is in llvm.used or llvm.compiler.used.
	return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
	}

	static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
	const LLVMUsed &U) {
	unsigned N = 2;
	assert((!U.usedCount(&V) \|\| !U.compilerUsedCount(&V)) &&
	"We should have removed the duplicated "
	"element from llvm.compiler.used");
	if (U.usedCount(&V) \|\| U.compilerUsedCount(&V))
	++N;
	return V.hasNUsesOrMore(N);
	}

	static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
	if (!GA.hasLocalLinkage())
	return true;

	return U.usedCount(&GA) \|\| U.compilerUsedCount(&GA);
	}

	static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
	bool &RenameTarget) {
	RenameTarget = false;
	bool Ret = false;
	if (hasUseOtherThanLLVMUsed(GA, U))
	Ret = true;

	// If the alias is externally visible, we may still be able to simplify it.
	if (!mayHaveOtherReferences(GA, U))
	return Ret;

	// If the aliasee has internal linkage, give it the name and linkage
	// of the alias, and delete the alias. This turns:
	// define internal ... @f(...)
	// @a = alias ... @f
	// into:
	// define ... @a(...)
	Constant *Aliasee = GA.getAliasee();
	GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
	if (!Target->hasLocalLinkage())
	return Ret;

	// Do not perform the transform if multiple aliases potentially target the
	// aliasee. This check also ensures that it is safe to replace the section
	// and other attributes of the aliasee with those of the alias.
	if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
	return Ret;

	RenameTarget = true;
	return true;
	}

	static bool
	OptimizeGlobalAliases(Module &M,
	SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
	bool Changed = false;
	LLVMUsed Used(M);

	for (GlobalValue *GV : Used.used())
	Used.compilerUsedErase(GV);

	// Return whether GV is explicitly or implicitly dso_local and not replaceable
	// by another definition in the current linkage unit.
	auto IsModuleLocal = [](GlobalValue &GV) {
	return !GlobalValue::isInterposableLinkage(GV.getLinkage()) &&
	(GV.isDSOLocal() \|\| GV.isImplicitDSOLocal());
	};

	for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
	// Aliases without names cannot be referenced outside this module.
	if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
	J.setLinkage(GlobalValue::InternalLinkage);

	if (deleteIfDead(J, NotDiscardableComdats)) {
	Changed = true;
	continue;
	}

	// If the alias can change at link time, nothing can be done - bail out.
	if (!IsModuleLocal(J))
	continue;

	Constant *Aliasee = J.getAliasee();
	GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
	// We can't trivially replace the alias with the aliasee if the aliasee is
	// non-trivial in some way. We also can't replace the alias with the aliasee
	// if the aliasee may be preemptible at runtime. On ELF, a non-preemptible
	// alias can be used to access the definition as if preemption did not
	// happen.
	// TODO: Try to handle non-zero GEPs of local aliasees.
	if (!Target \|\| !IsModuleLocal(*Target))
	continue;

	Target->removeDeadConstantUsers();

	// Make all users of the alias use the aliasee instead.
	bool RenameTarget;
	if (!hasUsesToReplace(J, Used, RenameTarget))
	continue;

	J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
	++NumAliasesResolved;
	Changed = true;

	if (RenameTarget) {
	// Give the aliasee the name, linkage and other attributes of the alias.
	Target->takeName(&J);
	Target->setLinkage(J.getLinkage());
	Target->setDSOLocal(J.isDSOLocal());
	Target->setVisibility(J.getVisibility());
	Target->setDLLStorageClass(J.getDLLStorageClass());

	if (Used.usedErase(&J))
	Used.usedInsert(Target);

	if (Used.compilerUsedErase(&J))
	Used.compilerUsedInsert(Target);
	} else if (mayHaveOtherReferences(J, Used))
	continue;

	// Delete the alias.
	M.getAliasList().erase(&J);
	++NumAliasesRemoved;
	Changed = true;
	}

	Used.syncVariablesAndSets();

	return Changed;
	}

	static Function *
	FindCXAAtExit(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI) {
	// Hack to get a default TLI before we have actual Function.
	auto FuncIter = M.begin();
	if (FuncIter == M.end())
	return nullptr;
	auto TLI = &GetTLI(FuncIter);

	LibFunc F = LibFunc_cxa_atexit;
	if (!TLI->has(F))
	return nullptr;

	Function *Fn = M.getFunction(TLI->getName(F));
	if (!Fn)
	return nullptr;

	// Now get the actual TLI for Fn.
	TLI = &GetTLI(*Fn);

	// Make sure that the function has the correct prototype.
	if (!TLI->getLibFunc(*Fn, F) \|\| F != LibFunc_cxa_atexit)
	return nullptr;

	return Fn;
	}

	/// Returns whether the given function is an empty C++ destructor and can
	/// therefore be eliminated.
	/// Note that we assume that other optimization passes have already simplified
	/// the code so we simply check for 'ret'.
	static bool cxxDtorIsEmpty(const Function &Fn) {
	// FIXME: We could eliminate C++ destructors if they're readonly/readnone and
	// nounwind, but that doesn't seem worth doing.
	if (Fn.isDeclaration())
	return false;

	for (auto &I : Fn.getEntryBlock()) {
	if (I.isDebugOrPseudoInst())
	continue;
	if (isa<ReturnInst>(I))
	return true;
	break;
	}
	return false;
	}

	static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
	/// Itanium C++ ABI p3.3.5:
	///
	/// After constructing a global (or local static) object, that will require
	/// destruction on exit, a termination function is registered as follows:
	///
	/// extern "C" int __cxa_atexit ( void (f)(void ), void p, void d );
	///
	/// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the
	/// call f(p) when DSO d is unloaded, before all such termination calls
	/// registered before this one. It returns zero if registration is
	/// successful, nonzero on failure.

	// This pass will look for calls to __cxa_atexit where the function is trivial
	// and remove them.
	bool Changed = false;

	for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
	// We're only interested in calls. Theoretically, we could handle invoke
	// instructions as well, but neither llvm-gcc nor clang generate invokes
	// to __cxa_atexit.
	CallInst *CI = dyn_cast<CallInst>(U);
	if (!CI)
	continue;

	Function *DtorFn =
	dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
	if (!DtorFn \|\| !cxxDtorIsEmpty(*DtorFn))
	continue;

	// Just remove the call.
	CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
	CI->eraseFromParent();

	++NumCXXDtorsRemoved;

	Changed \|= true;
	}

	return Changed;
	}

	static bool
	optimizeGlobalsInModule(Module &M, const DataLayout &DL,
	function_ref<TargetLibraryInfo &(Function &)> GetTLI,
	function_ref<TargetTransformInfo &(Function &)> GetTTI,
	function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
	function_ref<DominatorTree &(Function &)> LookupDomTree,
	function_ref<void(Function &F)> ChangedCFGCallback,
	function_ref<void(Function &F)> DeleteFnCallback) {
	SmallPtrSet<const Comdat *, 8> NotDiscardableComdats;
	bool Changed = false;
	bool LocalChange = true;
	Optional<uint32_t> FirstNotFullyEvaluatedPriority;

	while (LocalChange) {
	LocalChange = false;

	NotDiscardableComdats.clear();
	for (const GlobalVariable &GV : M.globals())
	if (const Comdat *C = GV.getComdat())
	if (!GV.isDiscardableIfUnused() \|\| !GV.use_empty())
	NotDiscardableComdats.insert(C);
	for (Function &F : M)
	if (const Comdat *C = F.getComdat())
	if (!F.isDefTriviallyDead())
	NotDiscardableComdats.insert(C);
	for (GlobalAlias &GA : M.aliases())
	if (const Comdat *C = GA.getComdat())
	if (!GA.isDiscardableIfUnused() \|\| !GA.use_empty())
	NotDiscardableComdats.insert(C);

	// Delete functions that are trivially dead, ccc -> fastcc
	LocalChange \|= OptimizeFunctions(M, GetTLI, GetTTI, GetBFI, LookupDomTree,
	NotDiscardableComdats, ChangedCFGCallback,
	DeleteFnCallback);

	// Optimize global_ctors list.
	LocalChange \|=
	optimizeGlobalCtorsList(M, [&](uint32_t Priority, Function *F) {
	if (FirstNotFullyEvaluatedPriority &&
	*FirstNotFullyEvaluatedPriority != Priority)
	return false;
	bool Evaluated = EvaluateStaticConstructor(F, DL, &GetTLI(*F));
	if (!Evaluated)
	FirstNotFullyEvaluatedPriority = Priority;
	return Evaluated;
	});

	// Optimize non-address-taken globals.
	LocalChange \|= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
	NotDiscardableComdats);

	// Resolve aliases, when possible.
	LocalChange \|= OptimizeGlobalAliases(M, NotDiscardableComdats);

	// Try to remove trivial global destructors if they are not removed
	// already.
	Function *CXAAtExitFn = FindCXAAtExit(M, GetTLI);
	if (CXAAtExitFn)
	LocalChange \|= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn);

	Changed \|= LocalChange;
	}

	// TODO: Move all global ctors functions to the end of the module for code
	// layout.

	return Changed;
	}

	PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
	auto &DL = M.getDataLayout();
	auto &FAM =
	AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
	auto LookupDomTree = [&FAM](Function &F) -> DominatorTree &{
	return FAM.getResult<DominatorTreeAnalysis>(F);
	};
	auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
	return FAM.getResult<TargetLibraryAnalysis>(F);
	};
	auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
	return FAM.getResult<TargetIRAnalysis>(F);
	};

	auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
	return FAM.getResult<BlockFrequencyAnalysis>(F);
	};
	auto ChangedCFGCallback = [&FAM](Function &F) {
	FAM.invalidate(F, PreservedAnalyses::none());
	};
	auto DeleteFnCallback = [&FAM](Function &F) { FAM.clear(F, F.getName()); };

	if (!optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
	ChangedCFGCallback, DeleteFnCallback))
	return PreservedAnalyses::all();

	PreservedAnalyses PA = PreservedAnalyses::none();
	// We made sure to clear analyses for deleted functions.
	PA.preserve<FunctionAnalysisManagerModuleProxy>();
	// The only place we modify the CFG is when calling
	// removeUnreachableBlocks(), but there we make sure to invalidate analyses
	// for modified functions.
	PA.preserveSet<CFGAnalyses>();
	return PA;
	}

	namespace {

	struct GlobalOptLegacyPass : public ModulePass {
	static char ID; // Pass identification, replacement for typeid

	GlobalOptLegacyPass() : ModulePass(ID) {
	initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
	}

	bool runOnModule(Module &M) override {
	if (skipModule(M))
	return false;

	auto &DL = M.getDataLayout();
	auto LookupDomTree = [this](Function &F) -> DominatorTree & {
	return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
	};
	auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
	return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
	};
	auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
	return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
	};

	auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
	return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
	};

	auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
	auto &DT = LookupDomTree(F);
	DT.recalculate(F);
	};

	return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
	ChangedCFGCallback, nullptr);
	}

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<TargetLibraryInfoWrapperPass>();
	AU.addRequired<TargetTransformInfoWrapperPass>();
	AU.addRequired<DominatorTreeWrapperPass>();
	AU.addRequired<BlockFrequencyInfoWrapperPass>();
	}
	};

	} // end anonymous namespace

	char GlobalOptLegacyPass::ID = 0;

	INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
	"Global Variable Optimizer", false, false)
	INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
	"Global Variable Optimizer", false, false)

	ModulePass *llvm::createGlobalOptimizerPass() {
	return new GlobalOptLegacyPass();
	}
	diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
	index bc01d2ef7fe2..52596b30494f 100644
	--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
	+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
	@@ -1,3632 +1,3636 @@
	//===- InstCombineCalls.cpp -----------------------------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements the visitCall, visitInvoke, and visitCallBr functions.
	//
	//===----------------------------------------------------------------------===//

	#include "InstCombineInternal.h"
	#include "llvm/ADT/APFloat.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/APSInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/None.h"
	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/STLFunctionalExtras.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/Analysis/AliasAnalysis.h"
	#include "llvm/Analysis/AssumeBundleQueries.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/InstructionSimplify.h"
	#include "llvm/Analysis/Loads.h"
	#include "llvm/Analysis/MemoryBuiltins.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/Analysis/VectorUtils.h"
	#include "llvm/IR/Attributes.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GlobalVariable.h"
	#include "llvm/IR/InlineAsm.h"
	#include "llvm/IR/InstrTypes.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/Intrinsics.h"
	#include "llvm/IR/IntrinsicsAArch64.h"
	#include "llvm/IR/IntrinsicsAMDGPU.h"
	#include "llvm/IR/IntrinsicsARM.h"
	#include "llvm/IR/IntrinsicsHexagon.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/PatternMatch.h"
	#include "llvm/IR/Statepoint.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/IR/ValueHandle.h"
	#include "llvm/Support/AtomicOrdering.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/KnownBits.h"
	#include "llvm/Support/MathExtras.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/InstCombine/InstCombiner.h"
	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
	#include <algorithm>
	#include <cassert>
	#include <cstdint>
	#include <utility>
	#include <vector>

	#define DEBUG_TYPE "instcombine"
	#include "llvm/Transforms/Utils/InstructionWorklist.h"

	using namespace llvm;
	using namespace PatternMatch;

	STATISTIC(NumSimplified, "Number of library calls simplified");

	static cl::opt<unsigned> GuardWideningWindow(
	"instcombine-guard-widening-window",
	cl::init(3),
	cl::desc("How wide an instruction window to bypass looking for "
	"another guard"));

	namespace llvm {
	/// enable preservation of attributes in assume like:
	/// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
	extern cl::opt<bool> EnableKnowledgeRetention;
	} // namespace llvm

	/// Return the specified type promoted as it would be to pass though a va_arg
	/// area.
	static Type getPromotedType(Type Ty) {
	if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
	if (ITy->getBitWidth() < 32)
	return Type::getInt32Ty(Ty->getContext());
	}
	return Ty;
	}

	/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
	/// TODO: This should probably be integrated with visitAllocSites, but that
	/// requires a deeper change to allow either unread or unwritten objects.
	static bool hasUndefSource(AnyMemTransferInst *MI) {
	auto *Src = MI->getRawSource();
	while (isa<GetElementPtrInst>(Src) \|\| isa<BitCastInst>(Src)) {
	if (!Src->hasOneUse())
	return false;
	Src = cast<Instruction>(Src)->getOperand(0);
	}
	return isa<AllocaInst>(Src) && Src->hasOneUse();
	}

	Instruction InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst MI) {
	Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
	MaybeAlign CopyDstAlign = MI->getDestAlign();
	if (!CopyDstAlign \|\| *CopyDstAlign < DstAlign) {
	MI->setDestAlignment(DstAlign);
	return MI;
	}

	Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
	MaybeAlign CopySrcAlign = MI->getSourceAlign();
	if (!CopySrcAlign \|\| *CopySrcAlign < SrcAlign) {
	MI->setSourceAlignment(SrcAlign);
	return MI;
	}

	// If we have a store to a location which is known constant, we can conclude
	// that the store must be storing the constant value (else the memory
	// wouldn't be constant), and this must be a noop.
	if (AA->pointsToConstantMemory(MI->getDest())) {
	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
	return MI;
	}

	// If the source is provably undef, the memcpy/memmove doesn't do anything
	// (unless the transfer is volatile).
	if (hasUndefSource(MI) && !MI->isVolatile()) {
	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
	return MI;
	}

	// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
	// load/store.
	ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
	if (!MemOpLength) return nullptr;

	// Source and destination pointer types are always "i8*" for intrinsic. See
	// if the size is something we can handle with a single primitive load/store.
	// A single load+store correctly handles overlapping memory in the memmove
	// case.
	uint64_t Size = MemOpLength->getLimitedValue();
	assert(Size && "0-sized memory transferring should be removed already.");

	if (Size > 8 \|\| (Size&(Size-1)))
	return nullptr; // If not 1/2/4/8 bytes, exit.

	// If it is an atomic and alignment is less than the size then we will
	// introduce the unaligned memory access which will be later transformed
	// into libcall in CodeGen. This is not evident performance gain so disable
	// it now.
	if (isa<AtomicMemTransferInst>(MI))
	if (CopyDstAlign < Size \|\| CopySrcAlign < Size)
	return nullptr;

	// Use an integer load+store unless we can find something better.
	unsigned SrcAddrSp =
	cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
	unsigned DstAddrSp =
	cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();

	IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
	Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
	Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);

	// If the memcpy has metadata describing the members, see if we can get the
	// TBAA tag describing our copy.
	MDNode *CopyMD = nullptr;
	if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
	CopyMD = M;
	} else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
	if (M->getNumOperands() == 3 && M->getOperand(0) &&
	mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
	mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
	M->getOperand(1) &&
	mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
	mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
	Size &&
	M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
	CopyMD = cast<MDNode>(M->getOperand(2));
	}

	Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
	Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
	LoadInst *L = Builder.CreateLoad(IntType, Src);
	// Alignment from the mem intrinsic will be better, so use it.
	L->setAlignment(*CopySrcAlign);
	if (CopyMD)
	L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
	MDNode *LoopMemParallelMD =
	MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
	if (LoopMemParallelMD)
	L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
	MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
	if (AccessGroupMD)
	L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);

	StoreInst *S = Builder.CreateStore(L, Dest);
	// Alignment from the mem intrinsic will be better, so use it.
	S->setAlignment(*CopyDstAlign);
	if (CopyMD)
	S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
	if (LoopMemParallelMD)
	S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
	if (AccessGroupMD)
	S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);

	if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
	// non-atomics can be volatile
	L->setVolatile(MT->isVolatile());
	S->setVolatile(MT->isVolatile());
	}
	if (isa<AtomicMemTransferInst>(MI)) {
	// atomics have to be unordered
	L->setOrdering(AtomicOrdering::Unordered);
	S->setOrdering(AtomicOrdering::Unordered);
	}

	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(MemOpLength->getType()));
	return MI;
	}

	Instruction InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst MI) {
	const Align KnownAlignment =
	getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
	MaybeAlign MemSetAlign = MI->getDestAlign();
	if (!MemSetAlign \|\| *MemSetAlign < KnownAlignment) {
	MI->setDestAlignment(KnownAlignment);
	return MI;
	}

	// If we have a store to a location which is known constant, we can conclude
	// that the store must be storing the constant value (else the memory
	// wouldn't be constant), and this must be a noop.
	if (AA->pointsToConstantMemory(MI->getDest())) {
	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
	return MI;
	}

	// Remove memset with an undef value.
	// FIXME: This is technically incorrect because it might overwrite a poison
	// value. Change to PoisonValue once #52930 is resolved.
	if (isa<UndefValue>(MI->getValue())) {
	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
	return MI;
	}

	// Extract the length and alignment and fill if they are constant.
	ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
	ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
	if (!LenC \|\| !FillC \|\| !FillC->getType()->isIntegerTy(8))
	return nullptr;
	const uint64_t Len = LenC->getLimitedValue();
	assert(Len && "0-sized memory setting should be removed already.");
	const Align Alignment = MI->getDestAlign().valueOrOne();

	// If it is an atomic and alignment is less than the size then we will
	// introduce the unaligned memory access which will be later transformed
	// into libcall in CodeGen. This is not evident performance gain so disable
	// it now.
	if (isa<AtomicMemSetInst>(MI))
	if (Alignment < Len)
	return nullptr;

	// memset(s,c,n) -> store s, c (for n=1,2,4,8)
	if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
	Type ITy = IntegerType::get(MI->getContext(), Len8); // n=1 -> i8.

	Value *Dest = MI->getDest();
	unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
	Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
	Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);

	// Extract the fill value and store.
	uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
	StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
	MI->isVolatile());
	S->setAlignment(Alignment);
	if (isa<AtomicMemSetInst>(MI))
	S->setOrdering(AtomicOrdering::Unordered);

	// Set the size of the copy to 0, it will be deleted on the next iteration.
	MI->setLength(Constant::getNullValue(LenC->getType()));
	return MI;
	}

	return nullptr;
	}

	// TODO, Obvious Missing Transforms:
	// * Narrow width by halfs excluding zero/undef lanes
	Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
	Value *LoadPtr = II.getArgOperand(0);
	const Align Alignment =
	cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();

	// If the mask is all ones or undefs, this is a plain vector load of the 1st
	// argument.
	if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
	LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
	"unmaskedload");
	L->copyMetadata(II);
	return L;
	}

	// If we can unconditionally load from this address, replace with a
	// load/select idiom. TODO: use DT for context sensitive query
	if (isDereferenceablePointer(LoadPtr, II.getType(),
	II.getModule()->getDataLayout(), &II, nullptr)) {
	LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
	"unmaskedload");
	LI->copyMetadata(II);
	return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
	}

	return nullptr;
	}

	// TODO, Obvious Missing Transforms:
	// * Single constant active lane -> store
	// * Narrow width by halfs excluding zero/undef lanes
	Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
	auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
	if (!ConstMask)
	return nullptr;

	// If the mask is all zeros, this instruction does nothing.
	if (ConstMask->isNullValue())
	return eraseInstFromFunction(II);

	// If the mask is all ones, this is a plain vector store of the 1st argument.
	if (ConstMask->isAllOnesValue()) {
	Value *StorePtr = II.getArgOperand(1);
	Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
	StoreInst *S =
	new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
	S->copyMetadata(II);
	return S;
	}

	if (isa<ScalableVectorType>(ConstMask->getType()))
	return nullptr;

	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
	APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
	APInt UndefElts(DemandedElts.getBitWidth(), 0);
	if (Value *V =
	SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
	return replaceOperand(II, 0, V);

	return nullptr;
	}

	// TODO, Obvious Missing Transforms:
	// * Single constant active lane load -> load
	// * Dereferenceable address & few lanes -> scalarize speculative load/selects
	// * Adjacent vector addresses -> masked.load
	// * Narrow width by halfs excluding zero/undef lanes
	// * Vector incrementing address -> vector masked load
	Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
	auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
	if (!ConstMask)
	return nullptr;

	// Vector splat address w/known mask -> scalar load
	// Fold the gather to load the source vector first lane
	// because it is reloading the same value each time
	if (ConstMask->isAllOnesValue())
	if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
	auto *VecTy = cast<VectorType>(II.getType());
	const Align Alignment =
	cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
	LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
	Alignment, "load.scalar");
	Value *Shuf =
	Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
	return replaceInstUsesWith(II, cast<Instruction>(Shuf));
	}

	return nullptr;
	}

	// TODO, Obvious Missing Transforms:
	// * Single constant active lane -> store
	// * Adjacent vector addresses -> masked.store
	// * Narrow store width by halfs excluding zero/undef lanes
	// * Vector incrementing address -> vector masked store
	Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
	auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
	if (!ConstMask)
	return nullptr;

	// If the mask is all zeros, a scatter does nothing.
	if (ConstMask->isNullValue())
	return eraseInstFromFunction(II);

	// Vector splat address -> scalar store
	if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
	// scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
	if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
	Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
	StoreInst *S =
	new StoreInst(SplatValue, SplatPtr, /IsVolatile=/false, Alignment);
	S->copyMetadata(II);
	return S;
	}
	// scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
	// lastlane), ptr
	if (ConstMask->isAllOnesValue()) {
	Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
	VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
	ElementCount VF = WideLoadTy->getElementCount();
	Constant *EC =
	ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
	Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
	Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
	Value *Extract =
	Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
	StoreInst *S =
	new StoreInst(Extract, SplatPtr, /IsVolatile=/false, Alignment);
	S->copyMetadata(II);
	return S;
	}
	}
	if (isa<ScalableVectorType>(ConstMask->getType()))
	return nullptr;

	// Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
	APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
	APInt UndefElts(DemandedElts.getBitWidth(), 0);
	if (Value *V =
	SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
	return replaceOperand(II, 0, V);
	if (Value *V =
	SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
	return replaceOperand(II, 1, V);

	return nullptr;
	}

	/// This function transforms launder.invariant.group and strip.invariant.group
	/// like:
	/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
	/// launder(strip(%x)) -> launder(%x)
	/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
	/// strip(launder(%x)) -> strip(%x)
	/// This is legal because it preserves the most recent information about
	/// the presence or absence of invariant.group.
	static Instruction *simplifyInvariantGroupIntrinsic(IntrinsicInst &II,
	InstCombinerImpl &IC) {
	auto *Arg = II.getArgOperand(0);
	auto *StrippedArg = Arg->stripPointerCasts();
	auto *StrippedInvariantGroupsArg = StrippedArg;
	while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
	if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
	Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
	break;
	StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
	}
	if (StrippedArg == StrippedInvariantGroupsArg)
	return nullptr; // No launders/strips to remove.

	Value *Result = nullptr;

	if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
	Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
	else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
	Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
	else
	llvm_unreachable(
	"simplifyInvariantGroupIntrinsic only handles launder and strip");
	if (Result->getType()->getPointerAddressSpace() !=
	II.getType()->getPointerAddressSpace())
	Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
	if (Result->getType() != II.getType())
	Result = IC.Builder.CreateBitCast(Result, II.getType());

	return cast<Instruction>(Result);
	}

	static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
	assert((II.getIntrinsicID() == Intrinsic::cttz \|\|
	II.getIntrinsicID() == Intrinsic::ctlz) &&
	"Expected cttz or ctlz intrinsic");
	bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
	Value *Op0 = II.getArgOperand(0);
	Value *Op1 = II.getArgOperand(1);
	Value *X;
	// ctlz(bitreverse(x)) -> cttz(x)
	// cttz(bitreverse(x)) -> ctlz(x)
	if (match(Op0, m_BitReverse(m_Value(X)))) {
	Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
	Function *F = Intrinsic::getDeclaration(II.getModule(), ID, II.getType());
	return CallInst::Create(F, {X, II.getArgOperand(1)});
	}

	if (II.getType()->isIntOrIntVectorTy(1)) {
	// ctlz/cttz i1 Op0 --> not Op0
	if (match(Op1, m_Zero()))
	return BinaryOperator::CreateNot(Op0);
	// If zero is poison, then the input can be assumed to be "true", so the
	// instruction simplifies to "false".
	assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
	return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
	}

	// If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
	if (auto *Sel = dyn_cast<SelectInst>(Op0))
	if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
	return R;

	if (IsTZ) {
	// cttz(-x) -> cttz(x)
	if (match(Op0, m_Neg(m_Value(X))))
	return IC.replaceOperand(II, 0, X);

	// cttz(sext(x)) -> cttz(zext(x))
	if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
	auto *Zext = IC.Builder.CreateZExt(X, II.getType());
	auto *CttzZext =
	IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
	return IC.replaceInstUsesWith(II, CttzZext);
	}

	// Zext doesn't change the number of trailing zeros, so narrow:
	// cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
	if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
	auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
	IC.Builder.getTrue());
	auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
	return IC.replaceInstUsesWith(II, ZextCttz);
	}

	// cttz(abs(x)) -> cttz(x)
	// cttz(nabs(x)) -> cttz(x)
	Value *Y;
	SelectPatternFlavor SPF = matchSelectPattern(Op0, X, Y).Flavor;
	if (SPF == SPF_ABS \|\| SPF == SPF_NABS)
	return IC.replaceOperand(II, 0, X);

	if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
	return IC.replaceOperand(II, 0, X);
	}

	KnownBits Known = IC.computeKnownBits(Op0, 0, &II);

	// Create a mask for bits above (ctlz) or below (cttz) the first known one.
	unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
	: Known.countMaxLeadingZeros();
	unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
	: Known.countMinLeadingZeros();

	// If all bits above (ctlz) or below (cttz) the first known one are known
	// zero, this value is constant.
	// FIXME: This should be in InstSimplify because we're replacing an
	// instruction with a constant.
	if (PossibleZeros == DefiniteZeros) {
	auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
	return IC.replaceInstUsesWith(II, C);
	}

	// If the input to cttz/ctlz is known to be non-zero,
	// then change the 'ZeroIsPoison' parameter to 'true'
	// because we know the zero behavior can't affect the result.
	if (!Known.One.isZero() \|\|
	isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
	&IC.getDominatorTree())) {
	if (!match(II.getArgOperand(1), m_One()))
	return IC.replaceOperand(II, 1, IC.Builder.getTrue());
	}

	// Add range metadata since known bits can't completely reflect what we know.
	// TODO: Handle splat vectors.
	auto *IT = dyn_cast<IntegerType>(Op0->getType());
	if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
	Metadata *LowAndHigh[] = {
	ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
	ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
	II.setMetadata(LLVMContext::MD_range,
	MDNode::get(II.getContext(), LowAndHigh));
	return &II;
	}

	return nullptr;
	}

	static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
	assert(II.getIntrinsicID() == Intrinsic::ctpop &&
	"Expected ctpop intrinsic");
	Type *Ty = II.getType();
	unsigned BitWidth = Ty->getScalarSizeInBits();
	Value *Op0 = II.getArgOperand(0);
	Value X, Y;

	// ctpop(bitreverse(x)) -> ctpop(x)
	// ctpop(bswap(x)) -> ctpop(x)
	if (match(Op0, m_BitReverse(m_Value(X))) \|\| match(Op0, m_BSwap(m_Value(X))))
	return IC.replaceOperand(II, 0, X);

	// ctpop(rot(x)) -> ctpop(x)
	if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) \|\|
	match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
	X == Y)
	return IC.replaceOperand(II, 0, X);

	// ctpop(x \| -x) -> bitwidth - cttz(x, false)
	if (Op0->hasOneUse() &&
	match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
	Function *F =
	Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
	auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
	auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
	return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
	}

	// ctpop(~x & (x - 1)) -> cttz(x, false)
	if (match(Op0,
	m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) {
	Function *F =
	Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
	return CallInst::Create(F, {X, IC.Builder.getFalse()});
	}

	// Zext doesn't change the number of set bits, so narrow:
	// ctpop (zext X) --> zext (ctpop X)
	if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
	Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
	return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
	}

	// If the operand is a select with constant arm(s), try to hoist ctpop.
	if (auto *Sel = dyn_cast<SelectInst>(Op0))
	if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
	return R;

	KnownBits Known(BitWidth);
	IC.computeKnownBits(Op0, Known, 0, &II);

	// If all bits are zero except for exactly one fixed bit, then the result
	// must be 0 or 1, and we can get that answer by shifting to LSB:
	// ctpop (X & 32) --> (X & 32) >> 5
	if ((~Known.Zero).isPowerOf2())
	return BinaryOperator::CreateLShr(
	Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));

	// FIXME: Try to simplify vectors of integers.
	auto *IT = dyn_cast<IntegerType>(Ty);
	if (!IT)
	return nullptr;

	// Add range metadata since known bits can't completely reflect what we know.
	unsigned MinCount = Known.countMinPopulation();
	unsigned MaxCount = Known.countMaxPopulation();
	if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
	Metadata *LowAndHigh[] = {
	ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),
	ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
	II.setMetadata(LLVMContext::MD_range,
	MDNode::get(II.getContext(), LowAndHigh));
	return &II;
	}

	return nullptr;
	}

	/// Convert a table lookup to shufflevector if the mask is constant.
	/// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
	/// which case we could lower the shufflevector with rev64 instructions
	/// as it's actually a byte reverse.
	static Value *simplifyNeonTbl1(const IntrinsicInst &II,
	InstCombiner::BuilderTy &Builder) {
	// Bail out if the mask is not a constant.
	auto *C = dyn_cast<Constant>(II.getArgOperand(1));
	if (!C)
	return nullptr;

	auto *VecTy = cast<FixedVectorType>(II.getType());
	unsigned NumElts = VecTy->getNumElements();

	// Only perform this transformation for <8 x i8> vector types.
	if (!VecTy->getElementType()->isIntegerTy(8) \|\| NumElts != 8)
	return nullptr;

	int Indexes[8];

	for (unsigned I = 0; I < NumElts; ++I) {
	Constant *COp = C->getAggregateElement(I);

	if (!COp \|\| !isa<ConstantInt>(COp))
	return nullptr;

	Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();

	// Make sure the mask indices are in range.
	if ((unsigned)Indexes[I] >= NumElts)
	return nullptr;
	}

	auto *V1 = II.getArgOperand(0);
	auto *V2 = Constant::getNullValue(V1->getType());
	return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
	}

	// Returns true iff the 2 intrinsics have the same operands, limiting the
	// comparison to the first NumOperands.
	static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
	unsigned NumOperands) {
	assert(I.arg_size() >= NumOperands && "Not enough operands");
	assert(E.arg_size() >= NumOperands && "Not enough operands");
	for (unsigned i = 0; i < NumOperands; i++)
	if (I.getArgOperand(i) != E.getArgOperand(i))
	return false;
	return true;
	}

	// Remove trivially empty start/end intrinsic ranges, i.e. a start
	// immediately followed by an end (ignoring debuginfo or other
	// start/end intrinsics in between). As this handles only the most trivial
	// cases, tracking the nesting level is not needed:
	//
	// call @llvm.foo.start(i1 0)
	// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
	// call @llvm.foo.end(i1 0)
	// call @llvm.foo.end(i1 0) ; &I
	static bool
	removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
	std::function<bool(const IntrinsicInst &)> IsStart) {
	// We start from the end intrinsic and scan backwards, so that InstCombine
	// has already processed (and potentially removed) all the instructions
	// before the end intrinsic.
	BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
	for (; BI != BE; ++BI) {
	if (auto I = dyn_cast<IntrinsicInst>(&BI)) {
	if (I->isDebugOrPseudoInst() \|\|
	I->getIntrinsicID() == EndI.getIntrinsicID())
	continue;
	if (IsStart(*I)) {
	if (haveSameOperands(EndI, *I, EndI.arg_size())) {
	IC.eraseInstFromFunction(*I);
	IC.eraseInstFromFunction(EndI);
	return true;
	}
	// Skip start intrinsics that don't pair with this end intrinsic.
	continue;
	}
	}
	break;
	}

	return false;
	}

	Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
	removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
	return I.getIntrinsicID() == Intrinsic::vastart \|\|
	I.getIntrinsicID() == Intrinsic::vacopy;
	});
	return nullptr;
	}

	static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
	assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
	Value Arg0 = Call.getArgOperand(0), Arg1 = Call.getArgOperand(1);
	if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
	Call.setArgOperand(0, Arg1);
	Call.setArgOperand(1, Arg0);
	return &Call;
	}
	return nullptr;
	}

	/// Creates a result tuple for an overflow intrinsic \p II with a given
	/// \p Result and a constant \p Overflow value.
	static Instruction createOverflowTuple(IntrinsicInst II, Value *Result,
	Constant *Overflow) {
	Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
	StructType *ST = cast<StructType>(II->getType());
	Constant *Struct = ConstantStruct::get(ST, V);
	return InsertValueInst::Create(Struct, Result, 0);
	}

	Instruction *
	InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
	WithOverflowInst *WO = cast<WithOverflowInst>(II);
	Value *OperationResult = nullptr;
	Constant *OverflowResult = nullptr;
	if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
	WO->getRHS(), *WO, OperationResult, OverflowResult))
	return createOverflowTuple(WO, OperationResult, OverflowResult);
	return nullptr;
	}

	static Optional<bool> getKnownSign(Value Op, Instruction CxtI,
	const DataLayout &DL, AssumptionCache *AC,
	DominatorTree *DT) {
	KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
	if (Known.isNonNegative())
	return false;
	if (Known.isNegative())
	return true;

	Value X, Y;
	if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
	return isImpliedByDomCondition(ICmpInst::ICMP_SLT, X, Y, CxtI, DL);

	return isImpliedByDomCondition(
	ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
	}

	/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
	/// can trigger other combines.
	static Instruction moveAddAfterMinMax(IntrinsicInst II,
	InstCombiner::BuilderTy &Builder) {
	Intrinsic::ID MinMaxID = II->getIntrinsicID();
	assert((MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin \|\|
	MinMaxID == Intrinsic::umax \|\| MinMaxID == Intrinsic::umin) &&
	"Expected a min or max intrinsic");

	// TODO: Match vectors with undef elements, but undef may not propagate.
	Value Op0 = II->getArgOperand(0), Op1 = II->getArgOperand(1);
	Value *X;
	const APInt C0, C1;
	if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) \|\|
	!match(Op1, m_APInt(C1)))
	return nullptr;

	// Check for necessary no-wrap and overflow constraints.
	bool IsSigned = MinMaxID == Intrinsic::smax \|\| MinMaxID == Intrinsic::smin;
	auto *Add = cast<BinaryOperator>(Op0);
	if ((IsSigned && !Add->hasNoSignedWrap()) \|\|
	(!IsSigned && !Add->hasNoUnsignedWrap()))
	return nullptr;

	// If the constant difference overflows, then instsimplify should reduce the
	// min/max to the add or C1.
	bool Overflow;
	APInt CDiff =
	IsSigned ? C1->ssub_ov(C0, Overflow) : C1->usub_ov(C0, Overflow);
	assert(!Overflow && "Expected simplify of min/max");

	// min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
	// Note: the "mismatched" no-overflow setting does not propagate.
	Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
	Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
	return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
	: BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
	}
	/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
	Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
	Type *Ty = MinMax1.getType();

	// We are looking for a tree of:
	// max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
	// Where the min and max could be reversed
	Instruction *MinMax2;
	BinaryOperator *AddSub;
	const APInt MinValue, MaxValue;
	if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
	if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
	return nullptr;
	} else if (match(&MinMax1,
	m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
	if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
	return nullptr;
	} else
	return nullptr;

	// Check that the constants clamp a saturate, and that the new type would be
	// sensible to convert to.
	if (!(MaxValue + 1).isPowerOf2() \|\| -MinValue != *MaxValue + 1)
	return nullptr;
	// In what bitwidth can this be treated as saturating arithmetics?
	unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
	// FIXME: This isn't quite right for vectors, but using the scalar type is a
	// good first approximation for what should be done there.
	if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
	return nullptr;

	// Also make sure that the inner min/max and the add/sub have one use.
	if (!MinMax2->hasOneUse() \|\| !AddSub->hasOneUse())
	return nullptr;

	// Create the new type (which can be a vector type)
	Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);

	Intrinsic::ID IntrinsicID;
	if (AddSub->getOpcode() == Instruction::Add)
	IntrinsicID = Intrinsic::sadd_sat;
	else if (AddSub->getOpcode() == Instruction::Sub)
	IntrinsicID = Intrinsic::ssub_sat;
	else
	return nullptr;

	// The two operands of the add/sub must be nsw-truncatable to the NewTy. This
	// is usually achieved via a sext from a smaller type.
	if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
	NewBitWidth \|\|
	ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
	return nullptr;

	// Finally create and return the sat intrinsic, truncated to the new type
	Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
	Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
	Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
	Value *Sat = Builder.CreateCall(F, {AT, BT});
	return CastInst::Create(Instruction::SExt, Sat, Ty);
	}


	/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
	/// can only be one of two possible constant values -- turn that into a select
	/// of constants.
	static Instruction foldClampRangeOfTwo(IntrinsicInst II,
	InstCombiner::BuilderTy &Builder) {
	Value I0 = II->getArgOperand(0), I1 = II->getArgOperand(1);
	Value *X;
	const APInt C0, C1;
	if (!match(I1, m_APInt(C1)) \|\| !I0->hasOneUse())
	return nullptr;

	CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
	switch (II->getIntrinsicID()) {
	case Intrinsic::smax:
	if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && C0 == C1 + 1)
	Pred = ICmpInst::ICMP_SGT;
	break;
	case Intrinsic::smin:
	if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && C1 == C0 + 1)
	Pred = ICmpInst::ICMP_SLT;
	break;
	case Intrinsic::umax:
	if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && C0 == C1 + 1)
	Pred = ICmpInst::ICMP_UGT;
	break;
	case Intrinsic::umin:
	if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && C1 == C0 + 1)
	Pred = ICmpInst::ICMP_ULT;
	break;
	default:
	llvm_unreachable("Expected min/max intrinsic");
	}
	if (Pred == CmpInst::BAD_ICMP_PREDICATE)
	return nullptr;

	// max (min X, 42), 41 --> X > 41 ? 42 : 41
	// min (max X, 42), 43 --> X < 43 ? 42 : 43
	Value *Cmp = Builder.CreateICmp(Pred, X, I1);
	return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
	}

	/// If this min/max has a constant operand and an operand that is a matching
	/// min/max with a constant operand, constant-fold the 2 constant operands.
	static Instruction reassociateMinMaxWithConstants(IntrinsicInst II) {
	Intrinsic::ID MinMaxID = II->getIntrinsicID();
	auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
	if (!LHS \|\| LHS->getIntrinsicID() != MinMaxID)
	return nullptr;

	Constant C0, C1;
	if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) \|\|
	!match(II->getArgOperand(1), m_ImmConstant(C1)))
	return nullptr;

	// max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
	ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
	Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
	Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);

	Module *Mod = II->getModule();
	Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
	return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
	}

	/// If this min/max has a matching min/max operand with a constant, try to push
	/// the constant operand into this instruction. This can enable more folds.
	static Instruction *
	reassociateMinMaxWithConstantInOperand(IntrinsicInst *II,
	InstCombiner::BuilderTy &Builder) {
	// Match and capture a min/max operand candidate.
	Value X, Y;
	Constant *C;
	Instruction *Inner;
	if (!match(II, m_c_MaxOrMin(m_OneUse(m_CombineAnd(
	m_Instruction(Inner),
	m_MaxOrMin(m_Value(X), m_ImmConstant(C)))),
	m_Value(Y))))
	return nullptr;

	// The inner op must match. Check for constants to avoid infinite loops.
	Intrinsic::ID MinMaxID = II->getIntrinsicID();
	auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
	if (!InnerMM \|\| InnerMM->getIntrinsicID() != MinMaxID \|\|
	match(X, m_ImmConstant()) \|\| match(Y, m_ImmConstant()))
	return nullptr;

	// max (max X, C), Y --> max (max X, Y), C
	Function *MinMax =
	Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
	Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
	NewInner->takeName(Inner);
	return CallInst::Create(MinMax, {NewInner, C});
	}

	/// Reduce a sequence of min/max intrinsics with a common operand.
	static Instruction factorizeMinMaxTree(IntrinsicInst II) {
	// Match 3 of the same min/max ops. Example: umin(umin(), umin()).
	auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
	auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
	Intrinsic::ID MinMaxID = II->getIntrinsicID();
	if (!LHS \|\| !RHS \|\| LHS->getIntrinsicID() != MinMaxID \|\|
	RHS->getIntrinsicID() != MinMaxID \|\|
	(!LHS->hasOneUse() && !RHS->hasOneUse()))
	return nullptr;

	Value *A = LHS->getArgOperand(0);
	Value *B = LHS->getArgOperand(1);
	Value *C = RHS->getArgOperand(0);
	Value *D = RHS->getArgOperand(1);

	// Look for a common operand.
	Value *MinMaxOp = nullptr;
	Value *ThirdOp = nullptr;
	if (LHS->hasOneUse()) {
	// If the LHS is only used in this chain and the RHS is used outside of it,
	// reuse the RHS min/max because that will eliminate the LHS.
	if (D == A \|\| C == A) {
	// min(min(a, b), min(c, a)) --> min(min(c, a), b)
	// min(min(a, b), min(a, d)) --> min(min(a, d), b)
	MinMaxOp = RHS;
	ThirdOp = B;
	} else if (D == B \|\| C == B) {
	// min(min(a, b), min(c, b)) --> min(min(c, b), a)
	// min(min(a, b), min(b, d)) --> min(min(b, d), a)
	MinMaxOp = RHS;
	ThirdOp = A;
	}
	} else {
	assert(RHS->hasOneUse() && "Expected one-use operand");
	// Reuse the LHS. This will eliminate the RHS.
	if (D == A \|\| D == B) {
	// min(min(a, b), min(c, a)) --> min(min(a, b), c)
	// min(min(a, b), min(c, b)) --> min(min(a, b), c)
	MinMaxOp = LHS;
	ThirdOp = C;
	} else if (C == A \|\| C == B) {
	// min(min(a, b), min(b, d)) --> min(min(a, b), d)
	// min(min(a, b), min(c, b)) --> min(min(a, b), d)
	MinMaxOp = LHS;
	ThirdOp = D;
	}
	}

	if (!MinMaxOp \|\| !ThirdOp)
	return nullptr;

	Module *Mod = II->getModule();
	Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
	return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
	}

	/// If all arguments of the intrinsic are unary shuffles with the same mask,
	/// try to shuffle after the intrinsic.
	static Instruction *
	foldShuffledIntrinsicOperands(IntrinsicInst *II,
	InstCombiner::BuilderTy &Builder) {
	// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
	// etc. Use llvm::isTriviallyVectorizable() and related to determine
	// which intrinsics are safe to shuffle?
	switch (II->getIntrinsicID()) {
	case Intrinsic::smax:
	case Intrinsic::smin:
	case Intrinsic::umax:
	case Intrinsic::umin:
	case Intrinsic::fma:
	case Intrinsic::fshl:
	case Intrinsic::fshr:
	break;
	default:
	return nullptr;
	}

	Value *X;
	ArrayRef<int> Mask;
	if (!match(II->getArgOperand(0),
	m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))
	return nullptr;

	// At least 1 operand must have 1 use because we are creating 2 instructions.
	if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
	return nullptr;

	// See if all arguments are shuffled with the same mask.
	SmallVector<Value *, 4> NewArgs(II->arg_size());
	NewArgs[0] = X;
	Type *SrcTy = X->getType();
	for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
	if (!match(II->getArgOperand(i),
	m_Shuffle(m_Value(X), m_Undef(), m_SpecificMask(Mask))) \|\|
	X->getType() != SrcTy)
	return nullptr;
	NewArgs[i] = X;
	}

	// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
	Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
	Value *NewIntrinsic =
	Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
	return new ShuffleVectorInst(NewIntrinsic, Mask);
	}

	/// CallInst simplification. This mostly only handles folding of intrinsic
	/// instructions. For normal calls, it allows visitCallBase to do the heavy
	/// lifting.
	Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
	// Don't try to simplify calls without uses. It will not do anything useful,
	// but will result in the following folds being skipped.
	if (!CI.use_empty())
	if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
	return replaceInstUsesWith(CI, V);

	if (Value *FreedOp = getFreedOperand(&CI, &TLI))
	return visitFree(CI, FreedOp);

	// If the caller function (i.e. us, the function that contains this CallInst)
	// is nounwind, mark the call as nounwind, even if the callee isn't.
	if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
	CI.setDoesNotThrow();
	return &CI;
	}

	IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
	if (!II) return visitCallBase(CI);

	// For atomic unordered mem intrinsics if len is not a positive or
	// not a multiple of element size then behavior is undefined.
	if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
	if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
	if (NumBytes->getSExtValue() < 0 \|\|
	(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
	CreateNonTerminatorUnreachable(AMI);
	assert(AMI->getType()->isVoidTy() &&
	"non void atomic unordered mem intrinsic");
	return eraseInstFromFunction(*AMI);
	}

	// Intrinsics cannot occur in an invoke or a callbr, so handle them here
	// instead of in visitCallBase.
	if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
	bool Changed = false;

	// memmove/cpy/set of zero bytes is a noop.
	if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
	if (NumBytes->isNullValue())
	return eraseInstFromFunction(CI);
	}

	// No other transformations apply to volatile transfers.
	if (auto *M = dyn_cast<MemIntrinsic>(MI))
	if (M->isVolatile())
	return nullptr;

	// If we have a memmove and the source operation is a constant global,
	// then the source and dest pointers can't alias, so we can change this
	// into a call to memcpy.
	if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
	if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
	if (GVSrc->isConstant()) {
	Module *M = CI.getModule();
	Intrinsic::ID MemCpyID =
	isa<AtomicMemMoveInst>(MMI)
	? Intrinsic::memcpy_element_unordered_atomic
	: Intrinsic::memcpy;
	Type *Tys[3] = { CI.getArgOperand(0)->getType(),
	CI.getArgOperand(1)->getType(),
	CI.getArgOperand(2)->getType() };
	CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
	Changed = true;
	}
	}

	if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
	// memmove(x,x,size) -> noop.
	if (MTI->getSource() == MTI->getDest())
	return eraseInstFromFunction(CI);
	}

	// If we can determine a pointer alignment that is bigger than currently
	// set, update the alignment.
	if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
	if (Instruction *I = SimplifyAnyMemTransfer(MTI))
	return I;
	} else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
	if (Instruction *I = SimplifyAnyMemSet(MSI))
	return I;
	}

	if (Changed) return II;
	}

	// For fixed width vector result intrinsics, use the generic demanded vector
	// support.
	if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
	auto VWidth = IIFVTy->getNumElements();
	APInt UndefElts(VWidth, 0);
	APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
	if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
	if (V != II)
	return replaceInstUsesWith(*II, V);
	return II;
	}
	}

	if (II->isCommutative()) {
	if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
	return NewCall;
	}

	// Unused constrained FP intrinsic calls may have declared side effect, which
	// prevents it from being removed. In some cases however the side effect is
	// actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
	// returns a replacement, the call may be removed.
	if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
	if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
	return eraseInstFromFunction(CI);
	}

	Intrinsic::ID IID = II->getIntrinsicID();
	switch (IID) {
	case Intrinsic::objectsize:
	if (Value V = lowerObjectSizeCall(II, DL, &TLI, AA, /MustSucceed=*/false))
	return replaceInstUsesWith(CI, V);
	return nullptr;
	case Intrinsic::abs: {
	Value *IIOperand = II->getArgOperand(0);
	bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();

	// abs(-x) -> abs(x)
	// TODO: Copy nsw if it was present on the neg?
	Value *X;
	if (match(IIOperand, m_Neg(m_Value(X))))
	return replaceOperand(*II, 0, X);
	if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
	return replaceOperand(*II, 0, X);
	if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
	return replaceOperand(*II, 0, X);

	if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
	// abs(x) -> x if x >= 0
	if (!*Sign)
	return replaceInstUsesWith(*II, IIOperand);

	// abs(x) -> -x if x < 0
	if (IntMinIsPoison)
	return BinaryOperator::CreateNSWNeg(IIOperand);
	return BinaryOperator::CreateNeg(IIOperand);
	}

	// abs (sext X) --> zext (abs X*)
	// Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
	if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
	Value *NarrowAbs =
	Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
	return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
	}

	// Match a complicated way to check if a number is odd/even:
	// abs (srem X, 2) --> and X, 1
	const APInt *C;
	if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
	return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));

	break;
	}
	case Intrinsic::umin: {
	Value I0 = II->getArgOperand(0), I1 = II->getArgOperand(1);
	// umin(x, 1) == zext(x != 0)
	if (match(I1, m_One())) {
	Value *Zero = Constant::getNullValue(I0->getType());
	Value *Cmp = Builder.CreateICmpNE(I0, Zero);
	return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::umax: {
	Value I0 = II->getArgOperand(0), I1 = II->getArgOperand(1);
	Value X, Y;
	if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
	return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
	}
	Constant *C;
	if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
	I0->hasOneUse()) {
	Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
	if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
	return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
	}
	}
	// If both operands of unsigned min/max are sign-extended, it is still ok
	// to narrow the operation.
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::smax:
	case Intrinsic::smin: {
	Value I0 = II->getArgOperand(0), I1 = II->getArgOperand(1);
	Value X, Y;
	if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
	(I0->hasOneUse() \|\| I1->hasOneUse()) && X->getType() == Y->getType()) {
	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
	return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
	}

	Constant *C;
	if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
	I0->hasOneUse()) {
	Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
	if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
	Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
	return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
	}
	}

	if (IID == Intrinsic::smax \|\| IID == Intrinsic::smin) {
	// smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
	// smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
	// TODO: Canonicalize neg after min/max if I1 is constant.
	if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
	(I0->hasOneUse() \|\| I1->hasOneUse())) {
	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
	return BinaryOperator::CreateNSWNeg(InvMaxMin);
	}
	}

	// If we can eliminate ~A and Y is free to invert:
	// max ~A, Y --> ~(min A, ~Y)
	//
	// Examples:
	// max ~A, ~Y --> ~(min A, Y)
	// max ~A, C --> ~(min A, ~C)
	// max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
	auto moveNotAfterMinMax = [&](Value X, Value Y) -> Instruction * {
	Value *A;
	if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
	!isFreeToInvert(A, A->hasOneUse()) &&
	isFreeToInvert(Y, Y->hasOneUse())) {
	Value *NotY = Builder.CreateNot(Y);
	Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
	Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
	return BinaryOperator::CreateNot(InvMaxMin);
	}
	return nullptr;
	};

	if (Instruction *I = moveNotAfterMinMax(I0, I1))
	return I;
	if (Instruction *I = moveNotAfterMinMax(I1, I0))
	return I;

	if (Instruction *I = moveAddAfterMinMax(II, Builder))
	return I;

	// smax(X, -X) --> abs(X)
	// smin(X, -X) --> -abs(X)
	// umax(X, -X) --> -abs(X)
	// umin(X, -X) --> abs(X)
	if (isKnownNegation(I0, I1)) {
	// We can choose either operand as the input to abs(), but if we can
	// eliminate the only use of a value, that's better for subsequent
	// transforms/analysis.
	if (I0->hasOneUse() && !I1->hasOneUse())
	std::swap(I0, I1);

	// This is some variant of abs(). See if we can propagate 'nsw' to the abs
	// operation and potentially its negation.
	bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
	Value *Abs = Builder.CreateBinaryIntrinsic(
	Intrinsic::abs, I0,
	ConstantInt::getBool(II->getContext(), IntMinIsPoison));

	// We don't have a "nabs" intrinsic, so negate if needed based on the
	// max/min operation.
	if (IID == Intrinsic::smin \|\| IID == Intrinsic::umax)
	Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
	return replaceInstUsesWith(CI, Abs);
	}

	if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
	return Sel;

	if (Instruction SAdd = matchSAddSubSat(II))
	return SAdd;

	if (match(I1, m_ImmConstant()))
	if (auto *Sel = dyn_cast<SelectInst>(I0))
	if (Instruction R = FoldOpIntoSelect(II, Sel))
	return R;

	if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
	return NewMinMax;

	if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
	return R;

	if (Instruction *NewMinMax = factorizeMinMaxTree(II))
	return NewMinMax;

	break;
	}
	case Intrinsic::bswap: {
	Value *IIOperand = II->getArgOperand(0);

	// Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
	// inverse-shift-of-bswap:
	// bswap (shl X, Y) --> lshr (bswap X), Y
	// bswap (lshr X, Y) --> shl (bswap X), Y
	Value X, Y;
	if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
	// The transform allows undef vector elements, so try a constant match
	// first. If knownbits can handle that case, that clause could be removed.
	unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
	const APInt *C;
	if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) \|\|
	MaskedValueIsZero(Y, APInt::getLowBitsSet(BitWidth, 3))) {
	Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
	BinaryOperator::BinaryOps InverseShift =
	cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
	? Instruction::LShr
	: Instruction::Shl;
	return BinaryOperator::Create(InverseShift, NewSwap, Y);
	}
	}

	KnownBits Known = computeKnownBits(IIOperand, 0, II);
	uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
	uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
	unsigned BW = Known.getBitWidth();

	// bswap(x) -> shift(x) if x has exactly one "active byte"
	if (BW - LZ - TZ == 8) {
	assert(LZ != TZ && "active byte cannot be in the middle");
	if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
	return BinaryOperator::CreateNUWShl(
	IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
	// -> lshr(x) if the "active byte" is in the high part of x
	return BinaryOperator::CreateExactLShr(
	IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
	}

	// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
	if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
	unsigned C = X->getType()->getScalarSizeInBits() - BW;
	Value *CV = ConstantInt::get(X->getType(), C);
	Value *V = Builder.CreateLShr(X, CV);
	return new TruncInst(V, IIOperand->getType());
	}
	break;
	}
	case Intrinsic::masked_load:
	if (Value SimplifiedMaskedOp = simplifyMaskedLoad(II))
	return replaceInstUsesWith(CI, SimplifiedMaskedOp);
	break;
	case Intrinsic::masked_store:
	return simplifyMaskedStore(*II);
	case Intrinsic::masked_gather:
	return simplifyMaskedGather(*II);
	case Intrinsic::masked_scatter:
	return simplifyMaskedScatter(*II);
	case Intrinsic::launder_invariant_group:
	case Intrinsic::strip_invariant_group:
	if (auto SkippedBarrier = simplifyInvariantGroupIntrinsic(II, *this))
	return replaceInstUsesWith(*II, SkippedBarrier);
	break;
	case Intrinsic::powi:
	if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
	// 0 and 1 are handled in instsimplify
	// powi(x, -1) -> 1/x
	if (Power->isMinusOne())
	return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
	II->getArgOperand(0), II);
	// powi(x, 2) -> x*x
	if (Power->equalsInt(2))
	return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
	II->getArgOperand(0), II);

	if (!Power->getValue()[0]) {
	Value *X;
	// If power is even:
	// powi(-x, p) -> powi(x, p)
	// powi(fabs(x), p) -> powi(x, p)
	// powi(copysign(x, y), p) -> powi(x, p)
	if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) \|\|
	match(II->getArgOperand(0), m_FAbs(m_Value(X))) \|\|
	match(II->getArgOperand(0),
	m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
	return replaceOperand(*II, 0, X);
	}
	}
	break;

	case Intrinsic::cttz:
	case Intrinsic::ctlz:
	if (auto I = foldCttzCtlz(II, *this))
	return I;
	break;

	case Intrinsic::ctpop:
	if (auto I = foldCtpop(II, *this))
	return I;
	break;

	case Intrinsic::fshl:
	case Intrinsic::fshr: {
	Value Op0 = II->getArgOperand(0), Op1 = II->getArgOperand(1);
	Type *Ty = II->getType();
	unsigned BitWidth = Ty->getScalarSizeInBits();
	Constant *ShAmtC;
	if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
	// Canonicalize a shift amount constant operand to modulo the bit-width.
	Constant *WidthC = ConstantInt::get(Ty, BitWidth);
	Constant *ModuloC =
	ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
	if (!ModuloC)
	return nullptr;
	if (ModuloC != ShAmtC)
	return replaceOperand(*II, 2, ModuloC);

	assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
	ConstantInt::getTrue(CmpInst::makeCmpResultType(Ty)) &&
	"Shift amount expected to be modulo bitwidth");

	// Canonicalize funnel shift right by constant to funnel shift left. This
	// is not entirely arbitrary. For historical reasons, the backend may
	// recognize rotate left patterns but miss rotate right patterns.
	if (IID == Intrinsic::fshr) {
	// fshr X, Y, C --> fshl X, Y, (BitWidth - C)
	Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
	Module *Mod = II->getModule();
	Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
	return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
	}
	assert(IID == Intrinsic::fshl &&
	"All funnel shifts by simple constants should go left");

	// fshl(X, 0, C) --> shl X, C
	// fshl(X, undef, C) --> shl X, C
	if (match(Op1, m_ZeroInt()) \|\| match(Op1, m_Undef()))
	return BinaryOperator::CreateShl(Op0, ShAmtC);

	// fshl(0, X, C) --> lshr X, (BW-C)
	// fshl(undef, X, C) --> lshr X, (BW-C)
	if (match(Op0, m_ZeroInt()) \|\| match(Op0, m_Undef()))
	return BinaryOperator::CreateLShr(Op1,
	ConstantExpr::getSub(WidthC, ShAmtC));

	// fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
	if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
	Module *Mod = II->getModule();
	Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
	return CallInst::Create(Bswap, { Op0 });
	}
	}

	// Left or right might be masked.
	if (SimplifyDemandedInstructionBits(*II))
	return &CI;

	// The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
	// so only the low bits of the shift amount are demanded if the bitwidth is
	// a power-of-2.
	if (!isPowerOf2_32(BitWidth))
	break;
	APInt Op2Demanded = APInt::getLowBitsSet(BitWidth, Log2_32_Ceil(BitWidth));
	KnownBits Op2Known(BitWidth);
	if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
	return &CI;
	break;
	}
	case Intrinsic::uadd_with_overflow:
	case Intrinsic::sadd_with_overflow: {
	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
	return I;

	// Given 2 constant operands whose sum does not overflow:
	// uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
	// saddo (X +nsw C0), C1 -> saddo X, C0 + C1
	Value *X;
	const APInt C0, C1;
	Value *Arg0 = II->getArgOperand(0);
	Value *Arg1 = II->getArgOperand(1);
	bool IsSigned = IID == Intrinsic::sadd_with_overflow;
	bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
	: match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
	if (HasNWAdd && match(Arg1, m_APInt(C1))) {
	bool Overflow;
	APInt NewC =
	IsSigned ? C1->sadd_ov(C0, Overflow) : C1->uadd_ov(C0, Overflow);
	if (!Overflow)
	return replaceInstUsesWith(
	*II, Builder.CreateBinaryIntrinsic(
	IID, X, ConstantInt::get(Arg1->getType(), NewC)));
	}
	break;
	}

	case Intrinsic::umul_with_overflow:
	case Intrinsic::smul_with_overflow:
	case Intrinsic::usub_with_overflow:
	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
	return I;
	break;

	case Intrinsic::ssub_with_overflow: {
	if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
	return I;

	Constant *C;
	Value *Arg0 = II->getArgOperand(0);
	Value *Arg1 = II->getArgOperand(1);
	// Given a constant C that is not the minimum signed value
	// for an integer of a given bit width:
	//
	// ssubo X, C -> saddo X, -C
	if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
	Value *NegVal = ConstantExpr::getNeg(C);
	// Build a saddo call that is equivalent to the discovered
	// ssubo call.
	return replaceInstUsesWith(
	*II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
	Arg0, NegVal));
	}

	break;
	}

	case Intrinsic::uadd_sat:
	case Intrinsic::sadd_sat:
	case Intrinsic::usub_sat:
	case Intrinsic::ssub_sat: {
	SaturatingInst *SI = cast<SaturatingInst>(II);
	Type *Ty = SI->getType();
	Value *Arg0 = SI->getLHS();
	Value *Arg1 = SI->getRHS();

	// Make use of known overflow information.
	OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
	Arg0, Arg1, SI);
	switch (OR) {
	case OverflowResult::MayOverflow:
	break;
	case OverflowResult::NeverOverflows:
	if (SI->isSigned())
	return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
	else
	return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
	case OverflowResult::AlwaysOverflowsLow: {
	unsigned BitWidth = Ty->getScalarSizeInBits();
	APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
	return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
	}
	case OverflowResult::AlwaysOverflowsHigh: {
	unsigned BitWidth = Ty->getScalarSizeInBits();
	APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
	return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
	}
	}

	// ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
	Constant *C;
	if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
	C->isNotMinSignedValue()) {
	Value *NegVal = ConstantExpr::getNeg(C);
	return replaceInstUsesWith(
	*II, Builder.CreateBinaryIntrinsic(
	Intrinsic::sadd_sat, Arg0, NegVal));
	}

	// sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
	// sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
	// if Val and Val2 have the same sign
	if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
	Value *X;
	const APInt Val, Val2;
	APInt NewVal;
	bool IsUnsigned =
	IID == Intrinsic::uadd_sat \|\| IID == Intrinsic::usub_sat;
	if (Other->getIntrinsicID() == IID &&
	match(Arg1, m_APInt(Val)) &&
	match(Other->getArgOperand(0), m_Value(X)) &&
	match(Other->getArgOperand(1), m_APInt(Val2))) {
	if (IsUnsigned)
	NewVal = Val->uadd_sat(*Val2);
	else if (Val->isNonNegative() == Val2->isNonNegative()) {
	bool Overflow;
	NewVal = Val->sadd_ov(*Val2, Overflow);
	if (Overflow) {
	// Both adds together may add more than SignedMaxValue
	// without saturating the final result.
	break;
	}
	} else {
	// Cannot fold saturated addition with different signs.
	break;
	}

	return replaceInstUsesWith(
	*II, Builder.CreateBinaryIntrinsic(
	IID, X, ConstantInt::get(II->getType(), NewVal)));
	}
	}
	break;
	}

	case Intrinsic::minnum:
	case Intrinsic::maxnum:
	case Intrinsic::minimum:
	case Intrinsic::maximum: {
	Value *Arg0 = II->getArgOperand(0);
	Value *Arg1 = II->getArgOperand(1);
	Value X, Y;
	if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
	(Arg0->hasOneUse() \|\| Arg1->hasOneUse())) {
	// If both operands are negated, invert the call and negate the result:
	// min(-X, -Y) --> -(max(X, Y))
	// max(-X, -Y) --> -(min(X, Y))
	Intrinsic::ID NewIID;
	switch (IID) {
	case Intrinsic::maxnum:
	NewIID = Intrinsic::minnum;
	break;
	case Intrinsic::minnum:
	NewIID = Intrinsic::maxnum;
	break;
	case Intrinsic::maximum:
	NewIID = Intrinsic::minimum;
	break;
	case Intrinsic::minimum:
	NewIID = Intrinsic::maximum;
	break;
	default:
	llvm_unreachable("unexpected intrinsic ID");
	}
	Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
	Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
	FNeg->copyIRFlags(II);
	return FNeg;
	}

	// m(m(X, C2), C1) -> m(X, C)
	const APFloat C1, C2;
	if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
	if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
	((match(M->getArgOperand(0), m_Value(X)) &&
	match(M->getArgOperand(1), m_APFloat(C2))) \|\|
	(match(M->getArgOperand(1), m_Value(X)) &&
	match(M->getArgOperand(0), m_APFloat(C2))))) {
	APFloat Res(0.0);
	switch (IID) {
	case Intrinsic::maxnum:
	Res = maxnum(C1, C2);
	break;
	case Intrinsic::minnum:
	Res = minnum(C1, C2);
	break;
	case Intrinsic::maximum:
	Res = maximum(C1, C2);
	break;
	case Intrinsic::minimum:
	Res = minimum(C1, C2);
	break;
	default:
	llvm_unreachable("unexpected intrinsic ID");
	}
	Instruction *NewCall = Builder.CreateBinaryIntrinsic(
	IID, X, ConstantFP::get(Arg0->getType(), Res), II);
	// TODO: Conservatively intersecting FMF. If Res == C2, the transform
	// was a simplification (so Arg0 and its original flags could
	// propagate?)
	NewCall->andIRFlags(M);
	return replaceInstUsesWith(*II, NewCall);
	}
	}

	// m((fpext X), (fpext Y)) -> fpext (m(X, Y))
	if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
	match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
	X->getType() == Y->getType()) {
	Value *NewCall =
	Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
	return new FPExtInst(NewCall, II->getType());
	}

	// max X, -X --> fabs X
	// min X, -X --> -(fabs X)
	// TODO: Remove one-use limitation? That is obviously better for max.
	// It would be an extra instruction for min (fnabs), but that is
	// still likely better for analysis and codegen.
	if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) \|\|
	(match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
	Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
	if (IID == Intrinsic::minimum \|\| IID == Intrinsic::minnum)
	R = Builder.CreateFNegFMF(R, II);
	return replaceInstUsesWith(*II, R);
	}

	break;
	}
	case Intrinsic::fmuladd: {
	// Canonicalize fast fmuladd to the separate fmul + fadd.
	if (II->isFast()) {
	BuilderTy::FastMathFlagGuard Guard(Builder);
	Builder.setFastMathFlags(II->getFastMathFlags());
	Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
	II->getArgOperand(1));
	Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
	Add->takeName(II);
	return replaceInstUsesWith(*II, Add);
	}

	// Try to simplify the underlying FMul.
	if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
	II->getFastMathFlags(),
	SQ.getWithInstruction(II))) {
	auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
	FAdd->copyFastMathFlags(II);
	return FAdd;
	}

	LLVM_FALLTHROUGH;
	}
	case Intrinsic::fma: {
	// fma fneg(x), fneg(y), z -> fma x, y, z
	Value *Src0 = II->getArgOperand(0);
	Value *Src1 = II->getArgOperand(1);
	Value X, Y;
	if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
	replaceOperand(*II, 0, X);
	replaceOperand(*II, 1, Y);
	return II;
	}

	// fma fabs(x), fabs(x), z -> fma x, x, z
	if (match(Src0, m_FAbs(m_Value(X))) &&
	match(Src1, m_FAbs(m_Specific(X)))) {
	replaceOperand(*II, 0, X);
	replaceOperand(*II, 1, X);
	return II;
	}

	// Try to simplify the underlying FMul. We can only apply simplifications
	// that do not require rounding.
	if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
	II->getFastMathFlags(),
	SQ.getWithInstruction(II))) {
	auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
	FAdd->copyFastMathFlags(II);
	return FAdd;
	}

	// fma x, y, 0 -> fmul x, y
	// This is always valid for -0.0, but requires nsz for +0.0 as
	// -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
	if (match(II->getArgOperand(2), m_NegZeroFP()) \|\|
	(match(II->getArgOperand(2), m_PosZeroFP()) &&
	II->getFastMathFlags().noSignedZeros()))
	return BinaryOperator::CreateFMulFMF(Src0, Src1, II);

	break;
	}
	case Intrinsic::copysign: {
	Value Mag = II->getArgOperand(0), Sign = II->getArgOperand(1);
	if (SignBitMustBeZero(Sign, &TLI)) {
	// If we know that the sign argument is positive, reduce to FABS:
	// copysign Mag, +Sign --> fabs Mag
	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
	return replaceInstUsesWith(*II, Fabs);
	}
	// TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
	const APFloat *C;
	if (match(Sign, m_APFloat(C)) && C->isNegative()) {
	// If we know that the sign argument is negative, reduce to FNABS:
	// copysign Mag, -Sign --> fneg (fabs Mag)
	Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
	return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
	}

	// Propagate sign argument through nested calls:
	// copysign Mag, (copysign ?, X) --> copysign Mag, X
	Value *X;
	if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
	return replaceOperand(*II, 1, X);

	// Peek through changes of magnitude's sign-bit. This call rewrites those:
	// copysign (fabs X), Sign --> copysign X, Sign
	// copysign (fneg X), Sign --> copysign X, Sign
	if (match(Mag, m_FAbs(m_Value(X))) \|\| match(Mag, m_FNeg(m_Value(X))))
	return replaceOperand(*II, 0, X);

	break;
	}
	case Intrinsic::fabs: {
	Value Cond, TVal, *FVal;
	if (match(II->getArgOperand(0),
	m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
	// fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
	if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
	CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
	CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
	return SelectInst::Create(Cond, AbsT, AbsF);
	}
	// fabs (select Cond, -FVal, FVal) --> fabs FVal
	if (match(TVal, m_FNeg(m_Specific(FVal))))
	return replaceOperand(*II, 0, FVal);
	// fabs (select Cond, TVal, -TVal) --> fabs TVal
	if (match(FVal, m_FNeg(m_Specific(TVal))))
	return replaceOperand(*II, 0, TVal);
	}

	LLVM_FALLTHROUGH;
	}
	case Intrinsic::ceil:
	case Intrinsic::floor:
	case Intrinsic::round:
	case Intrinsic::roundeven:
	case Intrinsic::nearbyint:
	case Intrinsic::rint:
	case Intrinsic::trunc: {
	Value *ExtSrc;
	if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
	// Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
	Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
	return new FPExtInst(NarrowII, II->getType());
	}
	break;
	}
	case Intrinsic::cos:
	case Intrinsic::amdgcn_cos: {
	Value *X;
	Value *Src = II->getArgOperand(0);
	if (match(Src, m_FNeg(m_Value(X))) \|\| match(Src, m_FAbs(m_Value(X)))) {
	// cos(-x) -> cos(x)
	// cos(fabs(x)) -> cos(x)
	return replaceOperand(*II, 0, X);
	}
	break;
	}
	case Intrinsic::sin: {
	Value *X;
	if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
	// sin(-x) --> -sin(x)
	Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
	Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);
	FNeg->copyFastMathFlags(II);
	return FNeg;
	}
	break;
	}

	case Intrinsic::arm_neon_vtbl1:
	case Intrinsic::aarch64_neon_tbl1:
	if (Value V = simplifyNeonTbl1(II, Builder))
	return replaceInstUsesWith(*II, V);
	break;

	case Intrinsic::arm_neon_vmulls:
	case Intrinsic::arm_neon_vmullu:
	case Intrinsic::aarch64_neon_smull:
	case Intrinsic::aarch64_neon_umull: {
	Value *Arg0 = II->getArgOperand(0);
	Value *Arg1 = II->getArgOperand(1);

	// Handle mul by zero first:
	if (isa<ConstantAggregateZero>(Arg0) \|\| isa<ConstantAggregateZero>(Arg1)) {
	return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
	}

	// Check for constant LHS & RHS - in this case we just simplify.
	bool Zext = (IID == Intrinsic::arm_neon_vmullu \|\|
	IID == Intrinsic::aarch64_neon_umull);
	VectorType *NewVT = cast<VectorType>(II->getType());
	if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
	if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
	CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /isSigned=/!Zext);
	CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /isSigned=/!Zext);

	return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
	}

	// Couldn't simplify - canonicalize constant to the RHS.
	std::swap(Arg0, Arg1);
	}

	// Handle mul by one:
	if (Constant *CV1 = dyn_cast<Constant>(Arg1))
	if (ConstantInt *Splat =
	dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
	if (Splat->isOne())
	return CastInst::CreateIntegerCast(Arg0, II->getType(),
	/isSigned=/!Zext);

	break;
	}
	case Intrinsic::arm_neon_aesd:
	case Intrinsic::arm_neon_aese:
	case Intrinsic::aarch64_crypto_aesd:
	case Intrinsic::aarch64_crypto_aese: {
	Value *DataArg = II->getArgOperand(0);
	Value *KeyArg = II->getArgOperand(1);

	// Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
	Value Data, Key;
	if (match(KeyArg, m_ZeroInt()) &&
	match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
	replaceOperand(*II, 0, Data);
	replaceOperand(*II, 1, Key);
	return II;
	}
	break;
	}
	case Intrinsic::hexagon_V6_vandvrt:
	case Intrinsic::hexagon_V6_vandvrt_128B: {
	// Simplify Q -> V -> Q conversion.
	if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
	Intrinsic::ID ID0 = Op0->getIntrinsicID();
	if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
	ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
	break;
	Value Bytes = Op0->getArgOperand(1), Mask = II->getArgOperand(1);
	uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue();
	uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue();
	// Check if every byte has common bits in Bytes and Mask.
	uint64_t C = Bytes1 & Mask1;
	if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
	return replaceInstUsesWith(*II, Op0->getArgOperand(0));
	}
	break;
	}
	case Intrinsic::stackrestore: {
	enum class ClassifyResult {
	None,
	Alloca,
	StackRestore,
	CallWithSideEffects,
	};
	auto Classify = [](const Instruction *I) {
	if (isa<AllocaInst>(I))
	return ClassifyResult::Alloca;

	if (auto *CI = dyn_cast<CallInst>(I)) {
	if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
	if (II->getIntrinsicID() == Intrinsic::stackrestore)
	return ClassifyResult::StackRestore;

	if (II->mayHaveSideEffects())
	return ClassifyResult::CallWithSideEffects;
	} else {
	// Consider all non-intrinsic calls to be side effects
	return ClassifyResult::CallWithSideEffects;
	}
	}

	return ClassifyResult::None;
	};

	// If the stacksave and the stackrestore are in the same BB, and there is
	// no intervening call, alloca, or stackrestore of a different stacksave,
	// remove the restore. This can happen when variable allocas are DCE'd.
	if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
	if (SS->getIntrinsicID() == Intrinsic::stacksave &&
	SS->getParent() == II->getParent()) {
	BasicBlock::iterator BI(SS);
	bool CannotRemove = false;
	for (++BI; &*BI != II; ++BI) {
	switch (Classify(&*BI)) {
	case ClassifyResult::None:
	// So far so good, look at next instructions.
	break;

	case ClassifyResult::StackRestore:
	// If we found an intervening stackrestore for a different
	// stacksave, we can't remove the stackrestore. Otherwise, continue.
	if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
	CannotRemove = true;
	break;

	case ClassifyResult::Alloca:
	case ClassifyResult::CallWithSideEffects:
	// If we found an alloca, a non-intrinsic call, or an intrinsic
	// call with side effects, we can't remove the stackrestore.
	CannotRemove = true;
	break;
	}
	if (CannotRemove)
	break;
	}

	if (!CannotRemove)
	return eraseInstFromFunction(CI);
	}
	}

	// Scan down this block to see if there is another stack restore in the
	// same block without an intervening call/alloca.
	BasicBlock::iterator BI(II);
	Instruction *TI = II->getParent()->getTerminator();
	bool CannotRemove = false;
	for (++BI; &*BI != TI; ++BI) {
	switch (Classify(&*BI)) {
	case ClassifyResult::None:
	// So far so good, look at next instructions.
	break;

	case ClassifyResult::StackRestore:
	// If there is a stackrestore below this one, remove this one.
	return eraseInstFromFunction(CI);

	case ClassifyResult::Alloca:
	case ClassifyResult::CallWithSideEffects:
	// If we found an alloca, a non-intrinsic call, or an intrinsic call
	// with side effects (such as llvm.stacksave and llvm.read_register),
	// we can't remove the stack restore.
	CannotRemove = true;
	break;
	}
	if (CannotRemove)
	break;
	}

	// If the stack restore is in a return, resume, or unwind block and if there
	// are no allocas or calls between the restore and the return, nuke the
	// restore.
	if (!CannotRemove && (isa<ReturnInst>(TI) \|\| isa<ResumeInst>(TI)))
	return eraseInstFromFunction(CI);
	break;
	}
	case Intrinsic::lifetime_end:
	// Asan needs to poison memory to detect invalid access which is possible
	// even for empty lifetime range.
	if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) \|\|
	II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) \|\|
	II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
	break;

	if (removeTriviallyEmptyRange(II, this, [](const IntrinsicInst &I) {
	return I.getIntrinsicID() == Intrinsic::lifetime_start;
	}))
	return nullptr;
	break;
	case Intrinsic::assume: {
	Value *IIOperand = II->getArgOperand(0);
	SmallVector<OperandBundleDef, 4> OpBundles;
	II->getOperandBundlesAsDefs(OpBundles);

	/// This will remove the boolean Condition from the assume given as
	/// argument and remove the assume if it becomes useless.
	/// always returns nullptr for use as a return values.
	auto RemoveConditionFromAssume = [&](Instruction Assume) -> Instruction {
	assert(isa<AssumeInst>(Assume));
	if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
	return eraseInstFromFunction(CI);
	replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
	return nullptr;
	};
	// Remove an assume if it is followed by an identical assume.
	// TODO: Do we need this? Unless there are conflicting assumptions, the
	// computeKnownBits(IIOperand) below here eliminates redundant assumes.
	Instruction *Next = II->getNextNonDebugInstruction();
	if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
	return RemoveConditionFromAssume(Next);

	// Canonicalize assume(a && b) -> assume(a); assume(b);
	// Note: New assumption intrinsics created here are registered by
	// the InstCombineIRInserter object.
	FunctionType *AssumeIntrinsicTy = II->getFunctionType();
	Value *AssumeIntrinsic = II->getCalledOperand();
	Value A, B;
	if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
	Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
	II->getName());
	Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
	return eraseInstFromFunction(*II);
	}
	// assume(!(a \|\| b)) -> assume(!a); assume(!b);
	if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
	Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
	Builder.CreateNot(A), OpBundles, II->getName());
	Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
	Builder.CreateNot(B), II->getName());
	return eraseInstFromFunction(*II);
	}

	// assume( (load addr) != null ) -> add 'nonnull' metadata to load
	// (if assume is valid at the load)
	CmpInst::Predicate Pred;
	Instruction *LHS;
	if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
	Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
	LHS->getType()->isPointerTy() &&
	isValidAssumeForContext(II, LHS, &DT)) {
	MDNode *MD = MDNode::get(II->getContext(), None);
	LHS->setMetadata(LLVMContext::MD_nonnull, MD);
	return RemoveConditionFromAssume(II);

	// TODO: apply nonnull return attributes to calls and invokes
	// TODO: apply range metadata for range check patterns?
	}

	// Convert nonnull assume like:
	// %A = icmp ne i32* %PTR, null
	// call void @llvm.assume(i1 %A)
	// into
	// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
	if (EnableKnowledgeRetention &&
	match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
	Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
	if (auto *Replacement = buildAssumeFromKnowledge(
	{RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {

	Replacement->insertBefore(Next);
	AC.registerAssumption(Replacement);
	return RemoveConditionFromAssume(II);
	}
	}

	// Convert alignment assume like:
	// %B = ptrtoint i32* %A to i64
	// %C = and i64 %B, Constant
	// %D = icmp eq i64 %C, 0
	// call void @llvm.assume(i1 %D)
	// into
	// call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
	uint64_t AlignMask;
	if (EnableKnowledgeRetention &&
	match(IIOperand,
	m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
	m_Zero())) &&
	Pred == CmpInst::ICMP_EQ) {
	if (isPowerOf2_64(AlignMask + 1)) {
	uint64_t Offset = 0;
	match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
	if (match(A, m_PtrToInt(m_Value(A)))) {
	/// Note: this doesn't preserve the offset information but merges
	/// offset and alignment.
	/// TODO: we can generate a GEP instead of merging the alignment with
	/// the offset.
	RetainedKnowledge RK{Attribute::Alignment,
	(unsigned)MinAlign(Offset, AlignMask + 1), A};
	if (auto *Replacement =
	buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {

	Replacement->insertAfter(II);
	AC.registerAssumption(Replacement);
	}
	return RemoveConditionFromAssume(II);
	}
	}
	}

	/// Canonicalize Knowledge in operand bundles.
	if (EnableKnowledgeRetention && II->hasOperandBundles()) {
	for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
	auto &BOI = II->bundle_op_info_begin()[Idx];
	RetainedKnowledge RK =
	llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
	if (BOI.End - BOI.Begin > 2)
	continue; // Prevent reducing knowledge in an align with offset since
	// extracting a RetainedKnowledge form them looses offset
	// information
	RetainedKnowledge CanonRK =
	llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
	&getAssumptionCache(),
	&getDominatorTree());
	if (CanonRK == RK)
	continue;
	if (!CanonRK) {
	if (BOI.End - BOI.Begin > 0) {
	Worklist.pushValue(II->op_begin()[BOI.Begin]);
	Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
	}
	continue;
	}
	assert(RK.AttrKind == CanonRK.AttrKind);
	if (BOI.End - BOI.Begin > 0)
	II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
	if (BOI.End - BOI.Begin > 1)
	II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
	Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
	if (RK.WasOn)
	Worklist.pushValue(RK.WasOn);
	return II;
	}
	}

	// If there is a dominating assume with the same condition as this one,
	// then this one is redundant, and should be removed.
	KnownBits Known(1);
	computeKnownBits(IIOperand, Known, 0, II);
	if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
	return eraseInstFromFunction(*II);

	// Update the cache of affected values for this assumption (we might be
	// here because we just simplified the condition).
	AC.updateAffectedValues(cast<AssumeInst>(II));
	break;
	}
	case Intrinsic::experimental_guard: {
	// Is this guard followed by another guard? We scan forward over a small
	// fixed window of instructions to handle common cases with conditions
	// computed between guards.
	Instruction *NextInst = II->getNextNonDebugInstruction();
	for (unsigned i = 0; i < GuardWideningWindow; i++) {
	// Note: Using context-free form to avoid compile time blow up
	if (!isSafeToSpeculativelyExecute(NextInst))
	break;
	NextInst = NextInst->getNextNonDebugInstruction();
	}
	Value *NextCond = nullptr;
	if (match(NextInst,
	m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
	Value *CurrCond = II->getArgOperand(0);

	// Remove a guard that it is immediately preceded by an identical guard.
	// Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
	if (CurrCond != NextCond) {
	Instruction *MoveI = II->getNextNonDebugInstruction();
	while (MoveI != NextInst) {
	auto *Temp = MoveI;
	MoveI = MoveI->getNextNonDebugInstruction();
	Temp->moveBefore(II);
	}
	replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
	}
	eraseInstFromFunction(*NextInst);
	return II;
	}
	break;
	}
	case Intrinsic::vector_insert: {
	Value *Vec = II->getArgOperand(0);
	Value *SubVec = II->getArgOperand(1);
	Value *Idx = II->getArgOperand(2);
	auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
	auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
	auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());

	// Only canonicalize if the destination vector, Vec, and SubVec are all
	// fixed vectors.
	if (DstTy && VecTy && SubVecTy) {
	unsigned DstNumElts = DstTy->getNumElements();
	unsigned VecNumElts = VecTy->getNumElements();
	unsigned SubVecNumElts = SubVecTy->getNumElements();
	unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

	// An insert that entirely overwrites Vec with SubVec is a nop.
	if (VecNumElts == SubVecNumElts)
	return replaceInstUsesWith(CI, SubVec);

	// Widen SubVec into a vector of the same width as Vec, since
	// shufflevector requires the two input vectors to be the same width.
	// Elements beyond the bounds of SubVec within the widened vector are
	// undefined.
	SmallVector<int, 8> WidenMask;
	unsigned i;
	for (i = 0; i != SubVecNumElts; ++i)
	WidenMask.push_back(i);
	for (; i != VecNumElts; ++i)
	WidenMask.push_back(UndefMaskElem);

	Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);

	SmallVector<int, 8> Mask;
	for (unsigned i = 0; i != IdxN; ++i)
	Mask.push_back(i);
	for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
	Mask.push_back(i);
	for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
	Mask.push_back(i);

	Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
	return replaceInstUsesWith(CI, Shuffle);
	}
	break;
	}
	case Intrinsic::vector_extract: {
	Value *Vec = II->getArgOperand(0);
	Value *Idx = II->getArgOperand(1);

	auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
	auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());

	// Only canonicalize if the the destination vector and Vec are fixed
	// vectors.
	if (DstTy && VecTy) {
	unsigned DstNumElts = DstTy->getNumElements();
	unsigned VecNumElts = VecTy->getNumElements();
	unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();

	// Extracting the entirety of Vec is a nop.
	if (VecNumElts == DstNumElts) {
	replaceInstUsesWith(CI, Vec);
	return eraseInstFromFunction(CI);
	}

	SmallVector<int, 8> Mask;
	for (unsigned i = 0; i != DstNumElts; ++i)
	Mask.push_back(IdxN + i);

	Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
	return replaceInstUsesWith(CI, Shuffle);
	}
	break;
	}
	case Intrinsic::experimental_vector_reverse: {
	Value BO0, BO1, X, Y;
	Value *Vec = II->getArgOperand(0);
	if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
	auto *OldBinOp = cast<BinaryOperator>(Vec);
	if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
	m_Value(X)))) {
	// rev(binop rev(X), rev(Y)) --> binop X, Y
	if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
	m_Value(Y))))
	return replaceInstUsesWith(CI,
	BinaryOperator::CreateWithCopiedFlags(
	OldBinOp->getOpcode(), X, Y, OldBinOp,
	OldBinOp->getName(), II));
	// rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
	if (isSplatValue(BO1))
	return replaceInstUsesWith(CI,
	BinaryOperator::CreateWithCopiedFlags(
	OldBinOp->getOpcode(), X, BO1,
	OldBinOp, OldBinOp->getName(), II));
	}
	// rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
	if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
	m_Value(Y))) &&
	isSplatValue(BO0))
	return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
	OldBinOp->getOpcode(), BO0, Y,
	OldBinOp, OldBinOp->getName(), II));
	}
	// rev(unop rev(X)) --> unop X
	if (match(Vec, m_OneUse(m_UnOp(
	m_Intrinsic<Intrinsic::experimental_vector_reverse>(
	m_Value(X)))))) {
	auto *OldUnOp = cast<UnaryOperator>(Vec);
	auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
	OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
	return replaceInstUsesWith(CI, NewUnOp);
	}
	break;
	}
	case Intrinsic::vector_reduce_or:
	case Intrinsic::vector_reduce_and: {
	// Canonicalize logical or/and reductions:
	// Or reduction for i1 is represented as:
	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
	// %res = cmp ne iReduxWidth %val, 0
	// And reduction for i1 is represented as:
	// %val = bitcast <ReduxWidth x i1> to iReduxWidth
	// %res = cmp eq iReduxWidth %val, 11111
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Value *Res = Builder.CreateBitCast(
	Vect, Builder.getIntNTy(FTy->getNumElements()));
	if (IID == Intrinsic::vector_reduce_and) {
	Res = Builder.CreateICmpEQ(
	Res, ConstantInt::getAllOnesValue(Res->getType()));
	} else {
	assert(IID == Intrinsic::vector_reduce_or &&
	"Expected or reduction.");
	Res = Builder.CreateIsNotNull(Res);
	}
	if (Arg != Vect)
	Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
	II->getType());
	return replaceInstUsesWith(CI, Res);
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_add: {
	if (IID == Intrinsic::vector_reduce_add) {
	// Convert vector_reduce_add(ZExt(<n x i1>)) to
	// ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
	// Convert vector_reduce_add(SExt(<n x i1>)) to
	// -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
	// Convert vector_reduce_add(<n x i1>) to
	// Trunc(ctpop(bitcast <n x i1> to in)).
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Value *V = Builder.CreateBitCast(
	Vect, Builder.getIntNTy(FTy->getNumElements()));
	Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
	if (Res->getType() != II->getType())
	Res = Builder.CreateZExtOrTrunc(Res, II->getType());
	if (Arg != Vect &&
	cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
	Res = Builder.CreateNeg(Res);
	return replaceInstUsesWith(CI, Res);
	}
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_xor: {
	if (IID == Intrinsic::vector_reduce_xor) {
	// Exclusive disjunction reduction over the vector with
	// (potentially-extended) i1 element type is actually a
	// (potentially-extended) arithmetic `add` reduction over the original
	// non-extended value:
	// vector_reduce_xor(?ext(<n x i1>))
	// -->
	// ?ext(vector_reduce_add(<n x i1>))
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Value *Res = Builder.CreateAddReduce(Vect);
	if (Arg != Vect)
	Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
	II->getType());
	return replaceInstUsesWith(CI, Res);
	}
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_mul: {
	if (IID == Intrinsic::vector_reduce_mul) {
	// Multiplicative reduction over the vector with (potentially-extended)
	// i1 element type is actually a (potentially zero-extended)
	// logical `and` reduction over the original non-extended value:
	// vector_reduce_mul(?ext(<n x i1>))
	// -->
	// zext(vector_reduce_and(<n x i1>))
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Value *Res = Builder.CreateAndReduce(Vect);
	if (Res->getType() != II->getType())
	Res = Builder.CreateZExt(Res, II->getType());
	return replaceInstUsesWith(CI, Res);
	}
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_umin:
	case Intrinsic::vector_reduce_umax: {
	if (IID == Intrinsic::vector_reduce_umin \|\|
	IID == Intrinsic::vector_reduce_umax) {
	// UMin/UMax reduction over the vector with (potentially-extended)
	// i1 element type is actually a (potentially-extended)
	// logical `and`/`or` reduction over the original non-extended value:
	// vector_reduce_u{min,max}(?ext(<n x i1>))
	// -->
	// ?ext(vector_reduce_{and,or}(<n x i1>))
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Value *Res = IID == Intrinsic::vector_reduce_umin
	? Builder.CreateAndReduce(Vect)
	: Builder.CreateOrReduce(Vect);
	if (Arg != Vect)
	Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
	II->getType());
	return replaceInstUsesWith(CI, Res);
	}
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_smin:
	case Intrinsic::vector_reduce_smax: {
	if (IID == Intrinsic::vector_reduce_smin \|\|
	IID == Intrinsic::vector_reduce_smax) {
	// SMin/SMax reduction over the vector with (potentially-extended)
	// i1 element type is actually a (potentially-extended)
	// logical `and`/`or` reduction over the original non-extended value:
	// vector_reduce_s{min,max}(<n x i1>)
	// -->
	// vector_reduce_{or,and}(<n x i1>)
	// and
	// vector_reduce_s{min,max}(sext(<n x i1>))
	// -->
	// sext(vector_reduce_{or,and}(<n x i1>))
	// and
	// vector_reduce_s{min,max}(zext(<n x i1>))
	// -->
	// zext(vector_reduce_{and,or}(<n x i1>))
	Value *Arg = II->getArgOperand(0);
	Value *Vect;
	if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
	if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
	if (FTy->getElementType() == Builder.getInt1Ty()) {
	Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
	if (Arg != Vect)
	ExtOpc = cast<CastInst>(Arg)->getOpcode();
	Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
	(ExtOpc == Instruction::CastOps::ZExt))
	? Builder.CreateAndReduce(Vect)
	: Builder.CreateOrReduce(Vect);
	if (Arg != Vect)
	Res = Builder.CreateCast(ExtOpc, Res, II->getType());
	return replaceInstUsesWith(CI, Res);
	}
	}
	}
	LLVM_FALLTHROUGH;
	}
	case Intrinsic::vector_reduce_fmax:
	case Intrinsic::vector_reduce_fmin:
	case Intrinsic::vector_reduce_fadd:
	case Intrinsic::vector_reduce_fmul: {
	bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
	IID != Intrinsic::vector_reduce_fmul) \|\|
	II->hasAllowReassoc();
	const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd \|\|
	IID == Intrinsic::vector_reduce_fmul)
	? 1
	: 0;
	Value *Arg = II->getArgOperand(ArgIdx);
	Value *V;
	ArrayRef<int> Mask;
	if (!isa<FixedVectorType>(Arg->getType()) \|\| !CanBeReassociated \|\|
	!match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) \|\|
	!cast<ShuffleVectorInst>(Arg)->isSingleSource())
	break;
	int Sz = Mask.size();
	SmallBitVector UsedIndices(Sz);
	for (int Idx : Mask) {
	if (Idx == UndefMaskElem \|\| UsedIndices.test(Idx))
	break;
	UsedIndices.set(Idx);
	}
	// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
	// other changes.
	if (UsedIndices.all()) {
	replaceUse(II->getOperandUse(ArgIdx), V);
	return nullptr;
	}
	break;
	}
	default: {
	// Handle target specific intrinsics
	Optional<Instruction > V = targetInstCombineIntrinsic(II);
	if (V)
	return V.value();
	break;
	}
	}

	if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
	return Shuf;

	// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
	// context, so it is handled in visitCallBase and we should trigger it.
	return visitCallBase(*II);
	}

	// Fence instruction simplification
	Instruction *InstCombinerImpl::visitFenceInst(FenceInst &FI) {
	auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
	// This check is solely here to handle arbitrary target-dependent syncscopes.
	// TODO: Can remove if does not matter in practice.
	if (NFI && FI.isIdenticalTo(NFI))
	return eraseInstFromFunction(FI);

	// Returns true if FI1 is identical or stronger fence than FI2.
	auto isIdenticalOrStrongerFence = [](FenceInst FI1, FenceInst FI2) {
	auto FI1SyncScope = FI1->getSyncScopeID();
	// Consider same scope, where scope is global or single-thread.
	if (FI1SyncScope != FI2->getSyncScopeID() \|\|
	(FI1SyncScope != SyncScope::System &&
	FI1SyncScope != SyncScope::SingleThread))
	return false;

	return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
	};
	if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
	return eraseInstFromFunction(FI);

	if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
	if (isIdenticalOrStrongerFence(PFI, &FI))
	return eraseInstFromFunction(FI);
	return nullptr;
	}

	// InvokeInst simplification
	Instruction *InstCombinerImpl::visitInvokeInst(InvokeInst &II) {
	return visitCallBase(II);
	}

	// CallBrInst simplification
	Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
	return visitCallBase(CBI);
	}

	/// If this cast does not affect the value passed through the varargs area, we
	/// can eliminate the use of the cast.
	static bool isSafeToEliminateVarargsCast(const CallBase &Call,
	const DataLayout &DL,
	const CastInst *const CI,
	const int ix) {
	if (!CI->isLosslessCast())
	return false;

	// If this is a GC intrinsic, avoid munging types. We need types for
	// statepoint reconstruction in SelectionDAG.
	// TODO: This is probably something which should be expanded to all
	// intrinsics since the entire point of intrinsics is that
	// they are understandable by the optimizer.
	if (isa<GCStatepointInst>(Call) \|\| isa<GCRelocateInst>(Call) \|\|
	isa<GCResultInst>(Call))
	return false;

	// Opaque pointers are compatible with any byval types.
	PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
	if (SrcTy->isOpaque())
	return true;

	// The size of ByVal or InAlloca arguments is derived from the type, so we
	// can't change to a type with a different size. If the size were
	// passed explicitly we could avoid this check.
	if (!Call.isPassPointeeByValueArgument(ix))
	return true;

	// The transform currently only handles type replacement for byval, not other
	// type-carrying attributes.
	if (!Call.isByValArgument(ix))
	return false;

	Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
	Type *DstElemTy = Call.getParamByValType(ix);
	if (!SrcElemTy->isSized() \|\| !DstElemTy->isSized())
	return false;
	if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
	return false;
	return true;
	}

	Instruction InstCombinerImpl::tryOptimizeCall(CallInst CI) {
	if (!CI->getCalledFunction()) return nullptr;

	// Skip optimizing notail and musttail calls so
	// LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
	// LibCallSimplifier::optimizeCall should try to preseve tail calls though.
	if (CI->isMustTailCall() \|\| CI->isNoTailCall())
	return nullptr;

	auto InstCombineRAUW = [this](Instruction From, Value With) {
	replaceInstUsesWith(*From, With);
	};
	auto InstCombineErase = [this](Instruction *I) {
	eraseInstFromFunction(*I);
	};
	LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
	InstCombineErase);
	if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
	++NumSimplified;
	return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
	}

	return nullptr;
	}

	static IntrinsicInst findInitTrampolineFromAlloca(Value TrampMem) {
	// Strip off at most one level of pointer casts, looking for an alloca. This
	// is good enough in practice and simpler than handling any number of casts.
	Value *Underlying = TrampMem->stripPointerCasts();
	if (Underlying != TrampMem &&
	(!Underlying->hasOneUse() \|\| Underlying->user_back() != TrampMem))
	return nullptr;
	if (!isa<AllocaInst>(Underlying))
	return nullptr;

	IntrinsicInst *InitTrampoline = nullptr;
	for (User *U : TrampMem->users()) {
	IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
	if (!II)
	return nullptr;
	if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
	if (InitTrampoline)
	// More than one init_trampoline writes to this value. Give up.
	return nullptr;
	InitTrampoline = II;
	continue;
	}
	if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
	// Allow any number of calls to adjust.trampoline.
	continue;
	return nullptr;
	}

	// No call to init.trampoline found.
	if (!InitTrampoline)
	return nullptr;

	// Check that the alloca is being used in the expected way.
	if (InitTrampoline->getOperand(0) != TrampMem)
	return nullptr;

	return InitTrampoline;
	}

	static IntrinsicInst findInitTrampolineFromBB(IntrinsicInst AdjustTramp,
	Value *TrampMem) {
	// Visit all the previous instructions in the basic block, and try to find a
	// init.trampoline which has a direct path to the adjust.trampoline.
	for (BasicBlock::iterator I = AdjustTramp->getIterator(),
	E = AdjustTramp->getParent()->begin();
	I != E;) {
	Instruction Inst = &--I;
	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
	if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
	II->getOperand(0) == TrampMem)
	return II;
	if (Inst->mayWriteToMemory())
	return nullptr;
	}
	return nullptr;
	}

	// Given a call to llvm.adjust.trampoline, find and return the corresponding
	// call to llvm.init.trampoline if the call to the trampoline can be optimized
	// to a direct call to a function. Otherwise return NULL.
	static IntrinsicInst findInitTrampoline(Value Callee) {
	Callee = Callee->stripPointerCasts();
	IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
	if (!AdjustTramp \|\|
	AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
	return nullptr;

	Value *TrampMem = AdjustTramp->getOperand(0);

	if (IntrinsicInst *IT = findInitTrampolineFromAlloca(TrampMem))
	return IT;
	if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
	return IT;
	return nullptr;
	}

	bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
	const TargetLibraryInfo *TLI) {
	// Note: We only handle cases which can't be driven from generic attributes
	// here. So, for example, nonnull and noalias (which are common properties
	// of some allocation functions) are expected to be handled via annotation
	// of the respective allocator declaration with generic attributes.
	bool Changed = false;

	if (!Call.getType()->isPointerTy())
	return Changed;

	Optional<APInt> Size = getAllocSize(&Call, TLI);
	if (Size && *Size != 0) {
	// TODO: We really should just emit deref_or_null here and then
	// let the generic inference code combine that with nonnull.
	if (Call.hasRetAttr(Attribute::NonNull)) {
	Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
	Call.addRetAttr(Attribute::getWithDereferenceableBytes(
	Call.getContext(), Size->getLimitedValue()));
	} else {
	Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
	Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
	Call.getContext(), Size->getLimitedValue()));
	}
	}

	// Add alignment attribute if alignment is a power of two constant.
	Value *Alignment = getAllocAlignment(&Call, TLI);
	if (!Alignment)
	return Changed;

	ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
	if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
	uint64_t AlignmentVal = AlignOpC->getZExtValue();
	if (llvm::isPowerOf2_64(AlignmentVal)) {
	Align ExistingAlign = Call.getRetAlign().valueOrOne();
	Align NewAlign = Align(AlignmentVal);
	if (NewAlign > ExistingAlign) {
	Call.addRetAttr(
	Attribute::getWithAlignment(Call.getContext(), NewAlign));
	Changed = true;
	}
	}
	}
	return Changed;
	}

	/// Improvements for call, callbr and invoke instructions.
	Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
	bool Changed = annotateAnyAllocSite(Call, &TLI);

	// Mark any parameters that are known to be non-null with the nonnull
	// attribute. This is helpful for inlining calls to functions with null
	// checks on their arguments.
	SmallVector<unsigned, 4> ArgNos;
	unsigned ArgNo = 0;

	for (Value *V : Call.args()) {
	if (V->getType()->isPointerTy() &&
	!Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
	isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
	ArgNos.push_back(ArgNo);
	ArgNo++;
	}

	assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");

	if (!ArgNos.empty()) {
	AttributeList AS = Call.getAttributes();
	LLVMContext &Ctx = Call.getContext();
	AS = AS.addParamAttribute(Ctx, ArgNos,
	Attribute::get(Ctx, Attribute::NonNull));
	Call.setAttributes(AS);
	Changed = true;
	}

	// If the callee is a pointer to a function, attempt to move any casts to the
	// arguments of the call/callbr/invoke.
	Value *Callee = Call.getCalledOperand();
	Function *CalleeF = dyn_cast<Function>(Callee);
	if ((!CalleeF \|\| CalleeF->getFunctionType() != Call.getFunctionType()) &&
	transformConstExprCastCall(Call))
	return nullptr;

	if (CalleeF) {
	// Remove the convergent attr on calls when the callee is not convergent.
	if (Call.isConvergent() && !CalleeF->isConvergent() &&
	!CalleeF->isIntrinsic()) {
	LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
	<< "\n");
	Call.setNotConvergent();
	return &Call;
	}

	// If the call and callee calling conventions don't match, and neither one
	// of the calling conventions is compatible with C calling convention
	// this call must be unreachable, as the call is undefined.
	if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
	!(CalleeF->getCallingConv() == llvm::CallingConv::C &&
	TargetLibraryInfoImpl::isCallingConvCCompatible(&Call)) &&
	!(Call.getCallingConv() == llvm::CallingConv::C &&
	TargetLibraryInfoImpl::isCallingConvCCompatible(CalleeF))) &&
	// Only do this for calls to a function with a body. A prototype may
	// not actually end up matching the implementation's calling conv for a
	// variety of reasons (e.g. it may be written in assembly).
	!CalleeF->isDeclaration()) {
	Instruction *OldCall = &Call;
	CreateNonTerminatorUnreachable(OldCall);
	// If OldCall does not return void then replaceInstUsesWith poison.
	// This allows ValueHandlers and custom metadata to adjust itself.
	if (!OldCall->getType()->isVoidTy())
	replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
	if (isa<CallInst>(OldCall))
	return eraseInstFromFunction(*OldCall);

	// We cannot remove an invoke or a callbr, because it would change thexi
	// CFG, just change the callee to a null pointer.
	cast<CallBase>(OldCall)->setCalledFunction(
	CalleeF->getFunctionType(),
	Constant::getNullValue(CalleeF->getType()));
	return nullptr;
	}
	}

	// Calling a null function pointer is undefined if a null address isn't
	// dereferenceable.
	if ((isa<ConstantPointerNull>(Callee) &&
	!NullPointerIsDefined(Call.getFunction())) \|\|
	isa<UndefValue>(Callee)) {
	// If Call does not return void then replaceInstUsesWith poison.
	// This allows ValueHandlers and custom metadata to adjust itself.
	if (!Call.getType()->isVoidTy())
	replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));

	if (Call.isTerminator()) {
	// Can't remove an invoke or callbr because we cannot change the CFG.
	return nullptr;
	}

	// This instruction is not reachable, just remove it.
	CreateNonTerminatorUnreachable(&Call);
	return eraseInstFromFunction(Call);
	}

	if (IntrinsicInst *II = findInitTrampoline(Callee))
	return transformCallThroughTrampoline(Call, *II);

	// TODO: Drop this transform once opaque pointer transition is done.
	FunctionType *FTy = Call.getFunctionType();
	if (FTy->isVarArg()) {
	int ix = FTy->getNumParams();
	// See if we can optimize any arguments passed through the varargs area of
	// the call.
	for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
	I != E; ++I, ++ix) {
	CastInst CI = dyn_cast<CastInst>(I);
	if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
	replaceUse(*I, CI->getOperand(0));

	// Update the byval type to match the pointer type.
	// Not necessary for opaque pointers.
	PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
	if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
	Call.removeParamAttr(ix, Attribute::ByVal);
	Call.addParamAttr(ix, Attribute::getWithByValType(
	Call.getContext(),
	NewTy->getNonOpaquePointerElementType()));
	}
	Changed = true;
	}
	}
	}

	if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
	InlineAsm *IA = cast<InlineAsm>(Callee);
	if (!IA->canThrow()) {
	// Normal inline asm calls cannot throw - mark them
	// 'nounwind'.
	Call.setDoesNotThrow();
	Changed = true;
	}
	}

	// Try to optimize the call if possible, we require DataLayout for most of
	// this. None of these calls are seen as possibly dead so go ahead and
	// delete the instruction now.
	if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
	Instruction *I = tryOptimizeCall(CI);
	// If we changed something return the result, etc. Otherwise let
	// the fallthrough check.
	if (I) return eraseInstFromFunction(*I);
	}

	if (!Call.use_empty() && !Call.isMustTailCall())
	if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
	Type *CallTy = Call.getType();
	Type *RetArgTy = ReturnedArg->getType();
	if (RetArgTy->canLosslesslyBitCastTo(CallTy))
	return replaceInstUsesWith(
	Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
	}

	if (isRemovableAlloc(&Call, &TLI))
	return visitAllocSite(Call);

	// Handle intrinsics which can be used in both call and invoke context.
	switch (Call.getIntrinsicID()) {
	case Intrinsic::experimental_gc_statepoint: {
	GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
	SmallPtrSet<Value *, 32> LiveGcValues;
	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
	GCRelocateInst &GCR = const_cast<GCRelocateInst >(Reloc);

	// Remove the relocation if unused.
	if (GCR.use_empty()) {
	eraseInstFromFunction(GCR);
	continue;
	}

	Value *DerivedPtr = GCR.getDerivedPtr();
	Value *BasePtr = GCR.getBasePtr();

	// Undef is undef, even after relocation.
	if (isa<UndefValue>(DerivedPtr) \|\| isa<UndefValue>(BasePtr)) {
	replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
	eraseInstFromFunction(GCR);
	continue;
	}

	if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
	// The relocation of null will be null for most any collector.
	// TODO: provide a hook for this in GCStrategy. There might be some
	// weird collector this property does not hold for.
	if (isa<ConstantPointerNull>(DerivedPtr)) {
	// Use null-pointer of gc_relocate's type to replace it.
	replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
	eraseInstFromFunction(GCR);
	continue;
	}

	// isKnownNonNull -> nonnull attribute
	if (!GCR.hasRetAttr(Attribute::NonNull) &&
	isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
	GCR.addRetAttr(Attribute::NonNull);
	// We discovered new fact, re-check users.
	Worklist.pushUsersToWorkList(GCR);
	}
	}

	// If we have two copies of the same pointer in the statepoint argument
	// list, canonicalize to one. This may let us common gc.relocates.
	if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
	GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
	auto *OpIntTy = GCR.getOperand(2)->getType();
	GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
	}

	// TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
	// Canonicalize on the type from the uses to the defs

	// TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
	LiveGcValues.insert(BasePtr);
	LiveGcValues.insert(DerivedPtr);
	}
	Optional<OperandBundleUse> Bundle =
	GCSP.getOperandBundle(LLVMContext::OB_gc_live);
	unsigned NumOfGCLives = LiveGcValues.size();
	if (!Bundle \|\| NumOfGCLives == Bundle->Inputs.size())
	break;
	// We can reduce the size of gc live bundle.
	DenseMap<Value *, unsigned> Val2Idx;
	std::vector<Value *> NewLiveGc;
	for (unsigned I = 0, E = Bundle->Inputs.size(); I < E; ++I) {
	Value *V = Bundle->Inputs[I];
	if (Val2Idx.count(V))
	continue;
	if (LiveGcValues.count(V)) {
	Val2Idx[V] = NewLiveGc.size();
	NewLiveGc.push_back(V);
	} else
	Val2Idx[V] = NumOfGCLives;
	}
	// Update all gc.relocates
	for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
	GCRelocateInst &GCR = const_cast<GCRelocateInst >(Reloc);
	Value *BasePtr = GCR.getBasePtr();
	assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
	"Missed live gc for base pointer");
	auto *OpIntTy1 = GCR.getOperand(1)->getType();
	GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
	Value *DerivedPtr = GCR.getDerivedPtr();
	assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
	"Missed live gc for derived pointer");
	auto *OpIntTy2 = GCR.getOperand(2)->getType();
	GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
	}
	// Create new statepoint instruction.
	OperandBundleDef NewBundle("gc-live", NewLiveGc);
	return CallBase::Create(&Call, NewBundle);
	}
	default: { break; }
	}

	return Changed ? &Call : nullptr;
	}

	/// If the callee is a constexpr cast of a function, attempt to move the cast to
	/// the arguments of the call/callbr/invoke.
	bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
	auto *Callee =
	dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
	if (!Callee)
	return false;

	// If this is a call to a thunk function, don't remove the cast. Thunks are
	// used to transparently forward all incoming parameters and outgoing return
	// values, so it's important to leave the cast in place.
	if (Callee->hasFnAttribute("thunk"))
	return false;

	// If this is a musttail call, the callee's prototype must match the caller's
	// prototype with the exception of pointee types. The code below doesn't
	// implement that, so we can't do this transform.
	// TODO: Do the transform if it only requires adding pointer casts.
	if (Call.isMustTailCall())
	return false;

	Instruction *Caller = &Call;
	const AttributeList &CallerPAL = Call.getAttributes();

	// Okay, this is a cast from a function to a different type. Unless doing so
	// would cause a type conversion of one of our arguments, change this call to
	// be a direct call with arguments casted to the appropriate types.
	FunctionType *FT = Callee->getFunctionType();
	Type *OldRetTy = Caller->getType();
	Type *NewRetTy = FT->getReturnType();

	// Check to see if we are changing the return type...
	if (OldRetTy != NewRetTy) {

	if (NewRetTy->isStructTy())
	return false; // TODO: Handle multiple return values.

	if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
	if (Callee->isDeclaration())
	return false; // Cannot transform this return value.

	if (!Caller->use_empty() &&
	// void -> non-void is handled specially
	!NewRetTy->isVoidTy())
	return false; // Cannot transform this return value.
	}

	if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
	if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
	return false; // Attribute not compatible with transformed value.
	}

	// If the callbase is an invoke/callbr instruction, and the return value is
	// used by a PHI node in a successor, we cannot change the return type of
	// the call because there is no place to put the cast instruction (without
	// breaking the critical edge). Bail out in this case.
	if (!Caller->use_empty()) {
	BasicBlock *PhisNotSupportedBlock = nullptr;
	if (auto *II = dyn_cast<InvokeInst>(Caller))
	PhisNotSupportedBlock = II->getNormalDest();
	if (auto *CB = dyn_cast<CallBrInst>(Caller))
	PhisNotSupportedBlock = CB->getDefaultDest();
	if (PhisNotSupportedBlock)
	for (User *U : Caller->users())
	if (PHINode *PN = dyn_cast<PHINode>(U))
	if (PN->getParent() == PhisNotSupportedBlock)
	return false;
	}
	}

	unsigned NumActualArgs = Call.arg_size();
	unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);

	// Prevent us turning:
	// declare void @takes_i32_inalloca(i32* inalloca)
	// call void bitcast (void (i32) @takes_i32_inalloca to void (i32)*)(i32 0)
	//
	// into:
	// call void @takes_i32_inalloca(i32* null)
	//
	// Similarly, avoid folding away bitcasts of byval calls.
	if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) \|\|
	Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
	return false;

	auto AI = Call.arg_begin();
	for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
	Type *ParamTy = FT->getParamType(i);
	Type ActTy = (AI)->getType();

	if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
	return false; // Cannot transform this parameter value.

	// Check if there are any incompatible attributes we cannot drop safely.
	if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
	.overlaps(AttributeFuncs::typeIncompatible(
	ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP)))
	return false; // Attribute not compatible with transformed value.

	if (Call.isInAllocaArgument(i) \|\|
	CallerPAL.hasParamAttr(i, Attribute::Preallocated))
	return false; // Cannot transform to and from inalloca/preallocated.

	if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
	return false;

	+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
	+ Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
	+ return false; // Cannot transform to or from byval.
	+
	// If the parameter is passed as a byval argument, then we have to have a
	// sized type and the sized type has to have the same size as the old type.
	if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
	PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
	if (!ParamPTy)
	return false;

	if (!ParamPTy->isOpaque()) {
	Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
	if (!ParamElTy->isSized())
	return false;

	Type *CurElTy = Call.getParamByValType(i);
	if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
	return false;
	}
	}
	}

	if (Callee->isDeclaration()) {
	// Do not delete arguments unless we have a function body.
	if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
	return false;

	// If the callee is just a declaration, don't change the varargsness of the
	// call. We don't want to introduce a varargs call where one doesn't
	// already exist.
	if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
	return false;

	// If both the callee and the cast type are varargs, we still have to make
	// sure the number of fixed parameters are the same or we have the same
	// ABI issues as if we introduce a varargs call.
	if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
	FT->getNumParams() != Call.getFunctionType()->getNumParams())
	return false;
	}

	if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
	!CallerPAL.isEmpty()) {
	// In this case we have more arguments than the new function type, but we
	// won't be dropping them. Check that these extra arguments have attributes
	// that are compatible with being a vararg call argument.
	unsigned SRetIdx;
	if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
	SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
	return false;
	}

	// Okay, we decided that this is a safe thing to do: go ahead and start
	// inserting cast instructions as necessary.
	SmallVector<Value *, 8> Args;
	SmallVector<AttributeSet, 8> ArgAttrs;
	Args.reserve(NumActualArgs);
	ArgAttrs.reserve(NumActualArgs);

	// Get any return attributes.
	AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());

	// If the return value is not being used, the type may not be compatible
	// with the existing attributes. Wipe out any problematic attributes.
	RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));

	LLVMContext &Ctx = Call.getContext();
	AI = Call.arg_begin();
	for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
	Type *ParamTy = FT->getParamType(i);

	Value NewArg = AI;
	if ((*AI)->getType() != ParamTy)
	NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
	Args.push_back(NewArg);

	// Add any parameter attributes except the ones incompatible with the new
	// type. Note that we made sure all incompatible ones are safe to drop.
	AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
	ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
	if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
	!ParamTy->isOpaquePointerTy()) {
	AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
	Ctx, IncompatibleAttrs));
	AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
	ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
	} else {
	ArgAttrs.push_back(
	CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
	}
	}

	// If the function takes more arguments than the call was taking, add them
	// now.
	for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
	Args.push_back(Constant::getNullValue(FT->getParamType(i)));
	ArgAttrs.push_back(AttributeSet());
	}

	// If we are removing arguments to the function, emit an obnoxious warning.
	if (FT->getNumParams() < NumActualArgs) {
	// TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
	if (FT->isVarArg()) {
	// Add all of the arguments in their promoted form to the arg list.
	for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
	Type PTy = getPromotedType((AI)->getType());
	Value NewArg = AI;
	if (PTy != (*AI)->getType()) {
	// Must promote to pass through va_arg area!
	Instruction::CastOps opcode =
	CastInst::getCastOpcode(*AI, false, PTy, false);
	NewArg = Builder.CreateCast(opcode, *AI, PTy);
	}
	Args.push_back(NewArg);

	// Add any parameter attributes.
	ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
	}
	}
	}

	AttributeSet FnAttrs = CallerPAL.getFnAttrs();

	if (NewRetTy->isVoidTy())
	Caller->setName(""); // Void type should not have a name.

	assert((ArgAttrs.size() == FT->getNumParams() \|\| FT->isVarArg()) &&
	"missing argument attributes");
	AttributeList NewCallerPAL = AttributeList::get(
	Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);

	SmallVector<OperandBundleDef, 1> OpBundles;
	Call.getOperandBundlesAsDefs(OpBundles);

	CallBase *NewCall;
	if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
	NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
	II->getUnwindDest(), Args, OpBundles);
	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
	NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
	CBI->getIndirectDests(), Args, OpBundles);
	} else {
	NewCall = Builder.CreateCall(Callee, Args, OpBundles);
	cast<CallInst>(NewCall)->setTailCallKind(
	cast<CallInst>(Caller)->getTailCallKind());
	}
	NewCall->takeName(Caller);
	NewCall->setCallingConv(Call.getCallingConv());
	NewCall->setAttributes(NewCallerPAL);

	// Preserve prof metadata if any.
	NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});

	// Insert a cast of the return type as necessary.
	Instruction *NC = NewCall;
	Value *NV = NC;
	if (OldRetTy != NV->getType() && !Caller->use_empty()) {
	if (!NV->getType()->isVoidTy()) {
	NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
	NC->setDebugLoc(Caller->getDebugLoc());

	// If this is an invoke/callbr instruction, we should insert it after the
	// first non-phi instruction in the normal successor block.
	if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
	BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
	InsertNewInstBefore(NC, *I);
	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
	BasicBlock::iterator I = CBI->getDefaultDest()->getFirstInsertionPt();
	InsertNewInstBefore(NC, *I);
	} else {
	// Otherwise, it's a call, just insert cast right after the call.
	InsertNewInstBefore(NC, *Caller);
	}
	Worklist.pushUsersToWorkList(*Caller);
	} else {
	NV = UndefValue::get(Caller->getType());
	}
	}

	if (!Caller->use_empty())
	replaceInstUsesWith(*Caller, NV);
	else if (Caller->hasValueHandle()) {
	if (OldRetTy == NV->getType())
	ValueHandleBase::ValueIsRAUWd(Caller, NV);
	else
	// We cannot call ValueIsRAUWd with a different type, and the
	// actual tracked value will disappear.
	ValueHandleBase::ValueIsDeleted(Caller);
	}

	eraseInstFromFunction(*Caller);
	return true;
	}

	/// Turn a call to a function created by init_trampoline / adjust_trampoline
	/// intrinsic pair into a direct call to the underlying function.
	Instruction *
	InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
	IntrinsicInst &Tramp) {
	Value *Callee = Call.getCalledOperand();
	Type *CalleeTy = Callee->getType();
	FunctionType *FTy = Call.getFunctionType();
	AttributeList Attrs = Call.getAttributes();

	// If the call already has the 'nest' attribute somewhere then give up -
	// otherwise 'nest' would occur twice after splicing in the chain.
	if (Attrs.hasAttrSomewhere(Attribute::Nest))
	return nullptr;

	Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
	FunctionType *NestFTy = NestF->getFunctionType();

	AttributeList NestAttrs = NestF->getAttributes();
	if (!NestAttrs.isEmpty()) {
	unsigned NestArgNo = 0;
	Type *NestTy = nullptr;
	AttributeSet NestAttr;

	// Look for a parameter marked with the 'nest' attribute.
	for (FunctionType::param_iterator I = NestFTy->param_begin(),
	E = NestFTy->param_end();
	I != E; ++NestArgNo, ++I) {
	AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
	if (AS.hasAttribute(Attribute::Nest)) {
	// Record the parameter type and any other attributes.
	NestTy = *I;
	NestAttr = AS;
	break;
	}
	}

	if (NestTy) {
	std::vector<Value*> NewArgs;
	std::vector<AttributeSet> NewArgAttrs;
	NewArgs.reserve(Call.arg_size() + 1);
	NewArgAttrs.reserve(Call.arg_size());

	// Insert the nest argument into the call argument list, which may
	// mean appending it. Likewise for attributes.

	{
	unsigned ArgNo = 0;
	auto I = Call.arg_begin(), E = Call.arg_end();
	do {
	if (ArgNo == NestArgNo) {
	// Add the chain argument and attributes.
	Value *NestVal = Tramp.getArgOperand(2);
	if (NestVal->getType() != NestTy)
	NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
	NewArgs.push_back(NestVal);
	NewArgAttrs.push_back(NestAttr);
	}

	if (I == E)
	break;

	// Add the original argument and attributes.
	NewArgs.push_back(*I);
	NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));

	++ArgNo;
	++I;
	} while (true);
	}

	// The trampoline may have been bitcast to a bogus type (FTy).
	// Handle this by synthesizing a new function type, equal to FTy
	// with the chain parameter inserted.

	std::vector<Type*> NewTypes;
	NewTypes.reserve(FTy->getNumParams()+1);

	// Insert the chain's type into the list of parameter types, which may
	// mean appending it.
	{
	unsigned ArgNo = 0;
	FunctionType::param_iterator I = FTy->param_begin(),
	E = FTy->param_end();

	do {
	if (ArgNo == NestArgNo)
	// Add the chain's type.
	NewTypes.push_back(NestTy);

	if (I == E)
	break;

	// Add the original type.
	NewTypes.push_back(*I);

	++ArgNo;
	++I;
	} while (true);
	}

	// Replace the trampoline call with a direct call. Let the generic
	// code sort out any function type mismatches.
	FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
	FTy->isVarArg());
	Constant *NewCallee =
	NestF->getType() == PointerType::getUnqual(NewFTy) ?
	NestF : ConstantExpr::getBitCast(NestF,
	PointerType::getUnqual(NewFTy));
	AttributeList NewPAL =
	AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
	Attrs.getRetAttrs(), NewArgAttrs);

	SmallVector<OperandBundleDef, 1> OpBundles;
	Call.getOperandBundlesAsDefs(OpBundles);

	Instruction *NewCaller;
	if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
	NewCaller = InvokeInst::Create(NewFTy, NewCallee,
	II->getNormalDest(), II->getUnwindDest(),
	NewArgs, OpBundles);
	cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
	cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
	} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
	NewCaller =
	CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
	CBI->getIndirectDests(), NewArgs, OpBundles);
	cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
	cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
	} else {
	NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
	cast<CallInst>(NewCaller)->setTailCallKind(
	cast<CallInst>(Call).getTailCallKind());
	cast<CallInst>(NewCaller)->setCallingConv(
	cast<CallInst>(Call).getCallingConv());
	cast<CallInst>(NewCaller)->setAttributes(NewPAL);
	}
	NewCaller->setDebugLoc(Call.getDebugLoc());

	return NewCaller;
	}
	}

	// Replace the trampoline call with a direct call. Since there is no 'nest'
	// parameter, there is no need to adjust the argument list. Let the generic
	// code sort out any function type mismatches.
	Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
	Call.setCalledFunction(FTy, NewCallee);
	return &Call;
	}
	diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
	index 143a035749c7..644c5c82e58e 100644
	--- a/llvm/lib/Transforms/Scalar/SROA.cpp
	+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
	@@ -1,4815 +1,4820 @@
	//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	/// \file
	/// This transformation implements the well known scalar replacement of
	/// aggregates transformation. It tries to identify promotable elements of an
	/// aggregate alloca, and promote them to registers. It will also try to
	/// convert uses of an element (or set of elements) of an alloca into a vector
	/// or bitfield-style integer scalar if appropriate.
	///
	/// It works to do this with minimal slicing of the alloca so that regions
	/// which are merely transferred in and out of external memory remain unchanged
	/// and are not decomposed to scalar code.
	///
	/// Because this also performs alloca promotion, it can be thought of as also
	/// serving the purpose of SSA formation. The algorithm iterates on the
	/// function until all opportunities for promotion have been realized.
	///
	//===----------------------------------------------------------------------===//

	#include "llvm/Transforms/Scalar/SROA.h"
	#include "llvm/ADT/APInt.h"
	#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/DenseMap.h"
	#include "llvm/ADT/PointerIntPair.h"
	#include "llvm/ADT/STLExtras.h"
	#include "llvm/ADT/SetVector.h"
	#include "llvm/ADT/SmallBitVector.h"
	#include "llvm/ADT/SmallPtrSet.h"
	#include "llvm/ADT/SmallVector.h"
	#include "llvm/ADT/Statistic.h"
	#include "llvm/ADT/StringRef.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/ADT/iterator.h"
	#include "llvm/ADT/iterator_range.h"
	#include "llvm/Analysis/AssumptionCache.h"
	#include "llvm/Analysis/GlobalsModRef.h"
	#include "llvm/Analysis/Loads.h"
	#include "llvm/Analysis/PtrUseVisitor.h"
	#include "llvm/Config/llvm-config.h"
	#include "llvm/IR/BasicBlock.h"
	#include "llvm/IR/Constant.h"
	#include "llvm/IR/ConstantFolder.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/DIBuilder.h"
	#include "llvm/IR/DataLayout.h"
	#include "llvm/IR/DebugInfo.h"
	#include "llvm/IR/DebugInfoMetadata.h"
	#include "llvm/IR/DerivedTypes.h"
	#include "llvm/IR/Dominators.h"
	#include "llvm/IR/Function.h"
	#include "llvm/IR/GetElementPtrTypeIterator.h"
	#include "llvm/IR/GlobalAlias.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/InstVisitor.h"
	#include "llvm/IR/Instruction.h"
	#include "llvm/IR/Instructions.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/IR/LLVMContext.h"
	#include "llvm/IR/Metadata.h"
	#include "llvm/IR/Module.h"
	#include "llvm/IR/Operator.h"
	#include "llvm/IR/PassManager.h"
	#include "llvm/IR/Type.h"
	#include "llvm/IR/Use.h"
	#include "llvm/IR/User.h"
	#include "llvm/IR/Value.h"
	#include "llvm/InitializePasses.h"
	#include "llvm/Pass.h"
	#include "llvm/Support/Casting.h"
	#include "llvm/Support/CommandLine.h"
	#include "llvm/Support/Compiler.h"
	#include "llvm/Support/Debug.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/raw_ostream.h"
	#include "llvm/Transforms/Scalar.h"
	#include "llvm/Transforms/Utils/Local.h"
	#include "llvm/Transforms/Utils/PromoteMemToReg.h"
	#include <algorithm>
	#include <cassert>
	#include <cstddef>
	#include <cstdint>
	#include <cstring>
	#include <iterator>
	#include <string>
	#include <tuple>
	#include <utility>
	#include <vector>

	using namespace llvm;
	using namespace llvm::sroa;

	#define DEBUG_TYPE "sroa"

	STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
	STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
	STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
	STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
	STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
	STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
	STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
	STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
	STATISTIC(NumDeleted, "Number of instructions deleted");
	STATISTIC(NumVectorized, "Number of vectorized aggregates");

	/// Hidden option to experiment with completely strict handling of inbounds
	/// GEPs.
	static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
	cl::Hidden);

	namespace {

	/// A custom IRBuilder inserter which prefixes all names, but only in
	/// Assert builds.
	class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
	std::string Prefix;

	Twine getNameWithPrefix(const Twine &Name) const {
	return Name.isTriviallyEmpty() ? Name : Prefix + Name;
	}

	public:
	void SetNamePrefix(const Twine &P) { Prefix = P.str(); }

	void InsertHelper(Instruction I, const Twine &Name, BasicBlock BB,
	BasicBlock::iterator InsertPt) const override {
	IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name), BB,
	InsertPt);
	}
	};

	/// Provide a type for IRBuilder that drops names in release builds.
	using IRBuilderTy = IRBuilder<ConstantFolder, IRBuilderPrefixedInserter>;

	/// A used slice of an alloca.
	///
	/// This structure represents a slice of an alloca used by some instruction. It
	/// stores both the begin and end offsets of this use, a pointer to the use
	/// itself, and a flag indicating whether we can classify the use as splittable
	/// or not when forming partitions of the alloca.
	class Slice {
	/// The beginning offset of the range.
	uint64_t BeginOffset = 0;

	/// The ending offset, not included in the range.
	uint64_t EndOffset = 0;

	/// Storage for both the use of this slice and whether it can be
	/// split.
	PointerIntPair<Use *, 1, bool> UseAndIsSplittable;

	public:
	Slice() = default;

	Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
	: BeginOffset(BeginOffset), EndOffset(EndOffset),
	UseAndIsSplittable(U, IsSplittable) {}

	uint64_t beginOffset() const { return BeginOffset; }
	uint64_t endOffset() const { return EndOffset; }

	bool isSplittable() const { return UseAndIsSplittable.getInt(); }
	void makeUnsplittable() { UseAndIsSplittable.setInt(false); }

	Use *getUse() const { return UseAndIsSplittable.getPointer(); }

	bool isDead() const { return getUse() == nullptr; }
	void kill() { UseAndIsSplittable.setPointer(nullptr); }

	/// Support for ordering ranges.
	///
	/// This provides an ordering over ranges such that start offsets are
	/// always increasing, and within equal start offsets, the end offsets are
	/// decreasing. Thus the spanning range comes first in a cluster with the
	/// same start position.
	bool operator<(const Slice &RHS) const {
	if (beginOffset() < RHS.beginOffset())
	return true;
	if (beginOffset() > RHS.beginOffset())
	return false;
	if (isSplittable() != RHS.isSplittable())
	return !isSplittable();
	if (endOffset() > RHS.endOffset())
	return true;
	return false;
	}

	/// Support comparison with a single offset to allow binary searches.
	friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
	uint64_t RHSOffset) {
	return LHS.beginOffset() < RHSOffset;
	}
	friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
	const Slice &RHS) {
	return LHSOffset < RHS.beginOffset();
	}

	bool operator==(const Slice &RHS) const {
	return isSplittable() == RHS.isSplittable() &&
	beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
	}
	bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
	};

	} // end anonymous namespace

	/// Representation of the alloca slices.
	///
	/// This class represents the slices of an alloca which are formed by its
	/// various uses. If a pointer escapes, we can't fully build a representation
	/// for the slices used and we reflect that in this structure. The uses are
	/// stored, sorted by increasing beginning offset and with unsplittable slices
	/// starting at a particular offset before splittable slices.
	class llvm::sroa::AllocaSlices {
	public:
	/// Construct the slices of a particular alloca.
	AllocaSlices(const DataLayout &DL, AllocaInst &AI);

	/// Test whether a pointer to the allocation escapes our analysis.
	///
	/// If this is true, the slices are never fully built and should be
	/// ignored.
	bool isEscaped() const { return PointerEscapingInstr; }

	/// Support for iterating over the slices.
	/// @{
	using iterator = SmallVectorImpl<Slice>::iterator;
	using range = iterator_range<iterator>;

	iterator begin() { return Slices.begin(); }
	iterator end() { return Slices.end(); }

	using const_iterator = SmallVectorImpl<Slice>::const_iterator;
	using const_range = iterator_range<const_iterator>;

	const_iterator begin() const { return Slices.begin(); }
	const_iterator end() const { return Slices.end(); }
	/// @}

	/// Erase a range of slices.
	void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }

	/// Insert new slices for this alloca.
	///
	/// This moves the slices into the alloca's slices collection, and re-sorts
	/// everything so that the usual ordering properties of the alloca's slices
	/// hold.
	void insert(ArrayRef<Slice> NewSlices) {
	int OldSize = Slices.size();
	Slices.append(NewSlices.begin(), NewSlices.end());
	auto SliceI = Slices.begin() + OldSize;
	llvm::sort(SliceI, Slices.end());
	std::inplace_merge(Slices.begin(), SliceI, Slices.end());
	}

	// Forward declare the iterator and range accessor for walking the
	// partitions.
	class partition_iterator;
	iterator_range<partition_iterator> partitions();

	/// Access the dead users for this alloca.
	ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }

	/// Access Uses that should be dropped if the alloca is promotable.
	ArrayRef<Use *> getDeadUsesIfPromotable() const {
	return DeadUseIfPromotable;
	}

	/// Access the dead operands referring to this alloca.
	///
	/// These are operands which have cannot actually be used to refer to the
	/// alloca as they are outside its range and the user doesn't correct for
	/// that. These mostly consist of PHI node inputs and the like which we just
	/// need to replace with undef.
	ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }

	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
	void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
	void printSlice(raw_ostream &OS, const_iterator I,
	StringRef Indent = " ") const;
	void printUse(raw_ostream &OS, const_iterator I,
	StringRef Indent = " ") const;
	void print(raw_ostream &OS) const;
	void dump(const_iterator I) const;
	void dump() const;
	#endif

	private:
	template <typename DerivedT, typename RetT = void> class BuilderBase;
	class SliceBuilder;

	friend class AllocaSlices::SliceBuilder;

	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
	/// Handle to alloca instruction to simplify method interfaces.
	AllocaInst &AI;
	#endif

	/// The instruction responsible for this alloca not having a known set
	/// of slices.
	///
	/// When an instruction (potentially) escapes the pointer to the alloca, we
	/// store a pointer to that here and abort trying to form slices of the
	/// alloca. This will be null if the alloca slices are analyzed successfully.
	Instruction *PointerEscapingInstr;

	/// The slices of the alloca.
	///
	/// We store a vector of the slices formed by uses of the alloca here. This
	/// vector is sorted by increasing begin offset, and then the unsplittable
	/// slices before the splittable ones. See the Slice inner class for more
	/// details.
	SmallVector<Slice, 8> Slices;

	/// Instructions which will become dead if we rewrite the alloca.
	///
	/// Note that these are not separated by slice. This is because we expect an
	/// alloca to be completely rewritten or not rewritten at all. If rewritten,
	/// all these instructions can simply be removed and replaced with poison as
	/// they come from outside of the allocated space.
	SmallVector<Instruction *, 8> DeadUsers;

	/// Uses which will become dead if can promote the alloca.
	SmallVector<Use *, 8> DeadUseIfPromotable;

	/// Operands which will become dead if we rewrite the alloca.
	///
	/// These are operands that in their particular use can be replaced with
	/// poison when we rewrite the alloca. These show up in out-of-bounds inputs
	/// to PHI nodes and the like. They aren't entirely dead (there might be
	/// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
	/// want to swap this particular input for poison to simplify the use lists of
	/// the alloca.
	SmallVector<Use *, 8> DeadOperands;
	};

	/// A partition of the slices.
	///
	/// An ephemeral representation for a range of slices which can be viewed as
	/// a partition of the alloca. This range represents a span of the alloca's
	/// memory which cannot be split, and provides access to all of the slices
	/// overlapping some part of the partition.
	///
	/// Objects of this type are produced by traversing the alloca's slices, but
	/// are only ephemeral and not persistent.
	class llvm::sroa::Partition {
	private:
	friend class AllocaSlices;
	friend class AllocaSlices::partition_iterator;

	using iterator = AllocaSlices::iterator;

	/// The beginning and ending offsets of the alloca for this
	/// partition.
	uint64_t BeginOffset = 0, EndOffset = 0;

	/// The start and end iterators of this partition.
	iterator SI, SJ;

	/// A collection of split slice tails overlapping the partition.
	SmallVector<Slice *, 4> SplitTails;

	/// Raw constructor builds an empty partition starting and ending at
	/// the given iterator.
	Partition(iterator SI) : SI(SI), SJ(SI) {}

	public:
	/// The start offset of this partition.
	///
	/// All of the contained slices start at or after this offset.
	uint64_t beginOffset() const { return BeginOffset; }

	/// The end offset of this partition.
	///
	/// All of the contained slices end at or before this offset.
	uint64_t endOffset() const { return EndOffset; }

	/// The size of the partition.
	///
	/// Note that this can never be zero.
	uint64_t size() const {
	assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
	return EndOffset - BeginOffset;
	}

	/// Test whether this partition contains no slices, and merely spans
	/// a region occupied by split slices.
	bool empty() const { return SI == SJ; }

	/// \name Iterate slices that start within the partition.
	/// These may be splittable or unsplittable. They have a begin offset >= the
	/// partition begin offset.
	/// @{
	// FIXME: We should probably define a "concat_iterator" helper and use that
	// to stitch together pointee_iterators over the split tails and the
	// contiguous iterators of the partition. That would give a much nicer
	// interface here. We could then additionally expose filtered iterators for
	// split, unsplit, and unsplittable splices based on the usage patterns.
	iterator begin() const { return SI; }
	iterator end() const { return SJ; }
	/// @}

	/// Get the sequence of split slice tails.
	///
	/// These tails are of slices which start before this partition but are
	/// split and overlap into the partition. We accumulate these while forming
	/// partitions.
	ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
	};

	/// An iterator over partitions of the alloca's slices.
	///
	/// This iterator implements the core algorithm for partitioning the alloca's
	/// slices. It is a forward iterator as we don't support backtracking for
	/// efficiency reasons, and re-use a single storage area to maintain the
	/// current set of split slices.
	///
	/// It is templated on the slice iterator type to use so that it can operate
	/// with either const or non-const slice iterators.
	class AllocaSlices::partition_iterator
	: public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
	Partition> {
	friend class AllocaSlices;

	/// Most of the state for walking the partitions is held in a class
	/// with a nice interface for examining them.
	Partition P;

	/// We need to keep the end of the slices to know when to stop.
	AllocaSlices::iterator SE;

	/// We also need to keep track of the maximum split end offset seen.
	/// FIXME: Do we really?
	uint64_t MaxSplitSliceEndOffset = 0;

	/// Sets the partition to be empty at given iterator, and sets the
	/// end iterator.
	partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
	: P(SI), SE(SE) {
	// If not already at the end, advance our state to form the initial
	// partition.
	if (SI != SE)
	advance();
	}

	/// Advance the iterator to the next partition.
	///
	/// Requires that the iterator not be at the end of the slices.
	void advance() {
	assert((P.SI != SE \|\| !P.SplitTails.empty()) &&
	"Cannot advance past the end of the slices!");

	// Clear out any split uses which have ended.
	if (!P.SplitTails.empty()) {
	if (P.EndOffset >= MaxSplitSliceEndOffset) {
	// If we've finished all splits, this is easy.
	P.SplitTails.clear();
	MaxSplitSliceEndOffset = 0;
	} else {
	// Remove the uses which have ended in the prior partition. This
	// cannot change the max split slice end because we just checked that
	// the prior partition ended prior to that max.
	llvm::erase_if(P.SplitTails,
	[&](Slice *S) { return S->endOffset() <= P.EndOffset; });
	assert(llvm::any_of(P.SplitTails,
	[&](Slice *S) {
	return S->endOffset() == MaxSplitSliceEndOffset;
	}) &&
	"Could not find the current max split slice offset!");
	assert(llvm::all_of(P.SplitTails,
	[&](Slice *S) {
	return S->endOffset() <= MaxSplitSliceEndOffset;
	}) &&
	"Max split slice end offset is not actually the max!");
	}
	}

	// If P.SI is already at the end, then we've cleared the split tail and
	// now have an end iterator.
	if (P.SI == SE) {
	assert(P.SplitTails.empty() && "Failed to clear the split slices!");
	return;
	}

	// If we had a non-empty partition previously, set up the state for
	// subsequent partitions.
	if (P.SI != P.SJ) {
	// Accumulate all the splittable slices which started in the old
	// partition into the split list.
	for (Slice &S : P)
	if (S.isSplittable() && S.endOffset() > P.EndOffset) {
	P.SplitTails.push_back(&S);
	MaxSplitSliceEndOffset =
	std::max(S.endOffset(), MaxSplitSliceEndOffset);
	}

	// Start from the end of the previous partition.
	P.SI = P.SJ;

	// If P.SI is now at the end, we at most have a tail of split slices.
	if (P.SI == SE) {
	P.BeginOffset = P.EndOffset;
	P.EndOffset = MaxSplitSliceEndOffset;
	return;
	}

	// If the we have split slices and the next slice is after a gap and is
	// not splittable immediately form an empty partition for the split
	// slices up until the next slice begins.
	if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
	!P.SI->isSplittable()) {
	P.BeginOffset = P.EndOffset;
	P.EndOffset = P.SI->beginOffset();
	return;
	}
	}

	// OK, we need to consume new slices. Set the end offset based on the
	// current slice, and step SJ past it. The beginning offset of the
	// partition is the beginning offset of the next slice unless we have
	// pre-existing split slices that are continuing, in which case we begin
	// at the prior end offset.
	P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
	P.EndOffset = P.SI->endOffset();
	++P.SJ;

	// There are two strategies to form a partition based on whether the
	// partition starts with an unsplittable slice or a splittable slice.
	if (!P.SI->isSplittable()) {
	// When we're forming an unsplittable region, it must always start at
	// the first slice and will extend through its end.
	assert(P.BeginOffset == P.SI->beginOffset());

	// Form a partition including all of the overlapping slices with this
	// unsplittable slice.
	while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
	if (!P.SJ->isSplittable())
	P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
	++P.SJ;
	}

	// We have a partition across a set of overlapping unsplittable
	// partitions.
	return;
	}

	// If we're starting with a splittable slice, then we need to form
	// a synthetic partition spanning it and any other overlapping splittable
	// splices.
	assert(P.SI->isSplittable() && "Forming a splittable partition!");

	// Collect all of the overlapping splittable slices.
	while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
	P.SJ->isSplittable()) {
	P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
	++P.SJ;
	}

	// Back upiP.EndOffset if we ended the span early when encountering an
	// unsplittable slice. This synthesizes the early end offset of
	// a partition spanning only splittable slices.
	if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
	assert(!P.SJ->isSplittable());
	P.EndOffset = P.SJ->beginOffset();
	}
	}

	public:
	bool operator==(const partition_iterator &RHS) const {
	assert(SE == RHS.SE &&
	"End iterators don't match between compared partition iterators!");

	// The observed positions of partitions is marked by the P.SI iterator and
	// the emptiness of the split slices. The latter is only relevant when
	// P.SI == SE, as the end iterator will additionally have an empty split
	// slices list, but the prior may have the same P.SI and a tail of split
	// slices.
	if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
	assert(P.SJ == RHS.P.SJ &&
	"Same set of slices formed two different sized partitions!");
	assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
	"Same slice position with differently sized non-empty split "
	"slice tails!");
	return true;
	}
	return false;
	}

	partition_iterator &operator++() {
	advance();
	return *this;
	}

	Partition &operator*() { return P; }
	};

	/// A forward range over the partitions of the alloca's slices.
	///
	/// This accesses an iterator range over the partitions of the alloca's
	/// slices. It computes these partitions on the fly based on the overlapping
	/// offsets of the slices and the ability to split them. It will visit "empty"
	/// partitions to cover regions of the alloca only accessed via split
	/// slices.
	iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
	return make_range(partition_iterator(begin(), end()),
	partition_iterator(end(), end()));
	}

	static Value *foldSelectInst(SelectInst &SI) {
	// If the condition being selected on is a constant or the same value is
	// being selected between, fold the select. Yes this does (rarely) happen
	// early on.
	if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
	return SI.getOperand(1 + CI->isZero());
	if (SI.getOperand(1) == SI.getOperand(2))
	return SI.getOperand(1);

	return nullptr;
	}

	/// A helper that folds a PHI node or a select.
	static Value *foldPHINodeOrSelectInst(Instruction &I) {
	if (PHINode *PN = dyn_cast<PHINode>(&I)) {
	// If PN merges together the same value, return that value.
	return PN->hasConstantValue();
	}
	return foldSelectInst(cast<SelectInst>(I));
	}

	/// Builder for the alloca slices.
	///
	/// This class builds a set of alloca slices by recursively visiting the uses
	/// of an alloca and making a slice for each load and store at each offset.
	class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
	friend class PtrUseVisitor<SliceBuilder>;
	friend class InstVisitor<SliceBuilder>;

	using Base = PtrUseVisitor<SliceBuilder>;

	const uint64_t AllocSize;
	AllocaSlices &AS;

	SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
	SmallDenseMap<Instruction *, uint64_t> PHIOrSelectSizes;

	/// Set to de-duplicate dead instructions found in the use walk.
	SmallPtrSet<Instruction *, 4> VisitedDeadInsts;

	public:
	SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
	: PtrUseVisitor<SliceBuilder>(DL),
	AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()),
	AS(AS) {}

	private:
	void markAsDead(Instruction &I) {
	if (VisitedDeadInsts.insert(&I).second)
	AS.DeadUsers.push_back(&I);
	}

	void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
	bool IsSplittable = false) {
	// Completely skip uses which have a zero size or start either before or
	// past the end of the allocation.
	if (Size == 0 \|\| Offset.uge(AllocSize)) {
	LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
	<< Offset
	<< " which has zero size or starts outside of the "
	<< AllocSize << " byte alloca:\n"
	<< " alloca: " << AS.AI << "\n"
	<< " use: " << I << "\n");
	return markAsDead(I);
	}

	uint64_t BeginOffset = Offset.getZExtValue();
	uint64_t EndOffset = BeginOffset + Size;

	// Clamp the end offset to the end of the allocation. Note that this is
	// formulated to handle even the case where "BeginOffset + Size" overflows.
	// This may appear superficially to be something we could ignore entirely,
	// but that is not so! There may be widened loads or PHI-node uses where
	// some instructions are dead but not others. We can't completely ignore
	// them, and so have to record at least the information here.
	assert(AllocSize >= BeginOffset); // Established above.
	if (Size > AllocSize - BeginOffset) {
	LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
	<< Offset << " to remain within the " << AllocSize
	<< " byte alloca:\n"
	<< " alloca: " << AS.AI << "\n"
	<< " use: " << I << "\n");
	EndOffset = AllocSize;
	}

	AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
	}

	void visitBitCastInst(BitCastInst &BC) {
	if (BC.use_empty())
	return markAsDead(BC);

	return Base::visitBitCastInst(BC);
	}

	void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
	if (ASC.use_empty())
	return markAsDead(ASC);

	return Base::visitAddrSpaceCastInst(ASC);
	}

	void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
	if (GEPI.use_empty())
	return markAsDead(GEPI);

	if (SROAStrictInbounds && GEPI.isInBounds()) {
	// FIXME: This is a manually un-factored variant of the basic code inside
	// of GEPs with checking of the inbounds invariant specified in the
	// langref in a very strict sense. If we ever want to enable
	// SROAStrictInbounds, this code should be factored cleanly into
	// PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
	// by writing out the code here where we have the underlying allocation
	// size readily available.
	APInt GEPOffset = Offset;
	const DataLayout &DL = GEPI.getModule()->getDataLayout();
	for (gep_type_iterator GTI = gep_type_begin(GEPI),
	GTE = gep_type_end(GEPI);
	GTI != GTE; ++GTI) {
	ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
	if (!OpC)
	break;

	// Handle a struct index, which adds its field offset to the pointer.
	if (StructType *STy = GTI.getStructTypeOrNull()) {
	unsigned ElementIdx = OpC->getZExtValue();
	const StructLayout *SL = DL.getStructLayout(STy);
	GEPOffset +=
	APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
	} else {
	// For array or vector indices, scale the index by the size of the
	// type.
	APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
	GEPOffset +=
	Index *
	APInt(Offset.getBitWidth(),
	DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
	}

	// If this index has computed an intermediate pointer which is not
	// inbounds, then the result of the GEP is a poison value and we can
	// delete it and all uses.
	if (GEPOffset.ugt(AllocSize))
	return markAsDead(GEPI);
	}
	}

	return Base::visitGetElementPtrInst(GEPI);
	}

	void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
	uint64_t Size, bool IsVolatile) {
	// We allow splitting of non-volatile loads and stores where the type is an
	// integer type. These may be used to implement 'memcpy' or other "transfer
	// of bits" patterns.
	bool IsSplittable =
	Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);

	insertUse(I, Offset, Size, IsSplittable);
	}

	void visitLoadInst(LoadInst &LI) {
	assert((!LI.isSimple() \|\| LI.getType()->isSingleValueType()) &&
	"All simple FCA loads should have been pre-split");

	if (!IsOffsetKnown)
	return PI.setAborted(&LI);

	if (LI.isVolatile() &&
	LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
	return PI.setAborted(&LI);

	if (isa<ScalableVectorType>(LI.getType()))
	return PI.setAborted(&LI);

	uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
	return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
	}

	void visitStoreInst(StoreInst &SI) {
	Value *ValOp = SI.getValueOperand();
	if (ValOp == *U)
	return PI.setEscapedAndAborted(&SI);
	if (!IsOffsetKnown)
	return PI.setAborted(&SI);

	if (SI.isVolatile() &&
	SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
	return PI.setAborted(&SI);

	if (isa<ScalableVectorType>(ValOp->getType()))
	return PI.setAborted(&SI);

	uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();

	// If this memory access can be shown to statically extend outside the
	// bounds of the allocation, it's behavior is undefined, so simply
	// ignore it. Note that this is more strict than the generic clamping
	// behavior of insertUse. We also try to handle cases which might run the
	// risk of overflow.
	// FIXME: We should instead consider the pointer to have escaped if this
	// function is being instrumented for addressing bugs or race conditions.
	if (Size > AllocSize \|\| Offset.ugt(AllocSize - Size)) {
	LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
	<< Offset << " which extends past the end of the "
	<< AllocSize << " byte alloca:\n"
	<< " alloca: " << AS.AI << "\n"
	<< " use: " << SI << "\n");
	return markAsDead(SI);
	}

	assert((!SI.isSimple() \|\| ValOp->getType()->isSingleValueType()) &&
	"All simple FCA stores should have been pre-split");
	handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
	}

	void visitMemSetInst(MemSetInst &II) {
	assert(II.getRawDest() == *U && "Pointer use is not the destination?");
	ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
	if ((Length && Length->getValue() == 0) \|\|
	(IsOffsetKnown && Offset.uge(AllocSize)))
	// Zero-length mem transfer intrinsics can be ignored entirely.
	return markAsDead(II);

	if (!IsOffsetKnown)
	return PI.setAborted(&II);

	// Don't replace this with a store with a different address space. TODO:
	// Use a store with the casted new alloca?
	if (II.isVolatile() && II.getDestAddressSpace() != DL.getAllocaAddrSpace())
	return PI.setAborted(&II);

	insertUse(II, Offset, Length ? Length->getLimitedValue()
	: AllocSize - Offset.getLimitedValue(),
	(bool)Length);
	}

	void visitMemTransferInst(MemTransferInst &II) {
	ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
	if (Length && Length->getValue() == 0)
	// Zero-length mem transfer intrinsics can be ignored entirely.
	return markAsDead(II);

	// Because we can visit these intrinsics twice, also check to see if the
	// first time marked this instruction as dead. If so, skip it.
	if (VisitedDeadInsts.count(&II))
	return;

	if (!IsOffsetKnown)
	return PI.setAborted(&II);

	// Don't replace this with a load/store with a different address space.
	// TODO: Use a store with the casted new alloca?
	if (II.isVolatile() &&
	(II.getDestAddressSpace() != DL.getAllocaAddrSpace() \|\|
	II.getSourceAddressSpace() != DL.getAllocaAddrSpace()))
	return PI.setAborted(&II);

	// This side of the transfer is completely out-of-bounds, and so we can
	// nuke the entire transfer. However, we also need to nuke the other side
	// if already added to our partitions.
	// FIXME: Yet another place we really should bypass this when
	// instrumenting for ASan.
	if (Offset.uge(AllocSize)) {
	SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
	MemTransferSliceMap.find(&II);
	if (MTPI != MemTransferSliceMap.end())
	AS.Slices[MTPI->second].kill();
	return markAsDead(II);
	}

	uint64_t RawOffset = Offset.getLimitedValue();
	uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;

	// Check for the special case where the same exact value is used for both
	// source and dest.
	if (U == II.getRawDest() && U == II.getRawSource()) {
	// For non-volatile transfers this is a no-op.
	if (!II.isVolatile())
	return markAsDead(II);

	return insertUse(II, Offset, Size, /IsSplittable=/false);
	}

	// If we have seen both source and destination for a mem transfer, then
	// they both point to the same alloca.
	bool Inserted;
	SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
	std::tie(MTPI, Inserted) =
	MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
	unsigned PrevIdx = MTPI->second;
	if (!Inserted) {
	Slice &PrevP = AS.Slices[PrevIdx];

	// Check if the begin offsets match and this is a non-volatile transfer.
	// In that case, we can completely elide the transfer.
	if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
	PrevP.kill();
	return markAsDead(II);
	}

	// Otherwise we have an offset transfer within the same alloca. We can't
	// split those.
	PrevP.makeUnsplittable();
	}

	// Insert the use now that we've fixed up the splittable nature.
	insertUse(II, Offset, Size, /IsSplittable=/Inserted && Length);

	// Check that we ended up with a valid index in the map.
	assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
	"Map index doesn't point back to a slice with this user.");
	}

	// Disable SRoA for any intrinsics except for lifetime invariants and
	// invariant group.
	// FIXME: What about debug intrinsics? This matches old behavior, but
	// doesn't make sense.
	void visitIntrinsicInst(IntrinsicInst &II) {
	if (II.isDroppable()) {
	AS.DeadUseIfPromotable.push_back(U);
	return;
	}

	if (!IsOffsetKnown)
	return PI.setAborted(&II);

	if (II.isLifetimeStartOrEnd()) {
	ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
	uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
	Length->getLimitedValue());
	insertUse(II, Offset, Size, true);
	return;
	}

	if (II.isLaunderOrStripInvariantGroup()) {
	enqueueUsers(II);
	return;
	}

	Base::visitIntrinsicInst(II);
	}

	Instruction hasUnsafePHIOrSelectUse(Instruction Root, uint64_t &Size) {
	// We consider any PHI or select that results in a direct load or store of
	// the same offset to be a viable use for slicing purposes. These uses
	// are considered unsplittable and the size is the maximum loaded or stored
	// size.
	SmallPtrSet<Instruction *, 4> Visited;
	SmallVector<std::pair<Instruction , Instruction >, 4> Uses;
	Visited.insert(Root);
	Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
	const DataLayout &DL = Root->getModule()->getDataLayout();
	// If there are no loads or stores, the access is dead. We mark that as
	// a size zero access.
	Size = 0;
	do {
	Instruction I, UsedI;
	std::tie(UsedI, I) = Uses.pop_back_val();

	if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
	Size = std::max(Size,
	DL.getTypeStoreSize(LI->getType()).getFixedSize());
	continue;
	}
	if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
	Value *Op = SI->getOperand(0);
	if (Op == UsedI)
	return SI;
	Size = std::max(Size,
	DL.getTypeStoreSize(Op->getType()).getFixedSize());
	continue;
	}

	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
	if (!GEP->hasAllZeroIndices())
	return GEP;
	} else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
	!isa<SelectInst>(I) && !isa<AddrSpaceCastInst>(I)) {
	return I;
	}

	for (User *U : I->users())
	if (Visited.insert(cast<Instruction>(U)).second)
	Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
	} while (!Uses.empty());

	return nullptr;
	}

	void visitPHINodeOrSelectInst(Instruction &I) {
	assert(isa<PHINode>(I) \|\| isa<SelectInst>(I));
	if (I.use_empty())
	return markAsDead(I);

	// If this is a PHI node before a catchswitch, we cannot insert any non-PHI
	// instructions in this BB, which may be required during rewriting. Bail out
	// on these cases.
	if (isa<PHINode>(I) &&
	I.getParent()->getFirstInsertionPt() == I.getParent()->end())
	return PI.setAborted(&I);

	// TODO: We could use simplifyInstruction here to fold PHINodes and
	// SelectInsts. However, doing so requires to change the current
	// dead-operand-tracking mechanism. For instance, suppose neither loading
	// from %U nor %other traps. Then "load (select undef, %U, %other)" does not
	// trap either. However, if we simply replace %U with undef using the
	// current dead-operand-tracking mechanism, "load (select undef, undef,
	// %other)" may trap because the select may return the first operand
	// "undef".
	if (Value *Result = foldPHINodeOrSelectInst(I)) {
	if (Result == *U)
	// If the result of the constant fold will be the pointer, recurse
	// through the PHI/select as if we had RAUW'ed it.
	enqueueUsers(I);
	else
	// Otherwise the operand to the PHI/select is dead, and we can replace
	// it with poison.
	AS.DeadOperands.push_back(U);

	return;
	}

	if (!IsOffsetKnown)
	return PI.setAborted(&I);

	// See if we already have computed info on this node.
	uint64_t &Size = PHIOrSelectSizes[&I];
	if (!Size) {
	// This is a new PHI/Select, check for an unsafe use of it.
	if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
	return PI.setAborted(UnsafeI);
	}

	// For PHI and select operands outside the alloca, we can't nuke the entire
	// phi or select -- the other side might still be relevant, so we special
	// case them here and use a separate structure to track the operands
	// themselves which should be replaced with poison.
	// FIXME: This should instead be escaped in the event we're instrumenting
	// for address sanitization.
	if (Offset.uge(AllocSize)) {
	AS.DeadOperands.push_back(U);
	return;
	}

	insertUse(I, Offset, Size);
	}

	void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }

	void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }

	/// Disable SROA entirely if there are unhandled users of the alloca.
	void visitInstruction(Instruction &I) { PI.setAborted(&I); }
	};

	AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
	:
	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
	AI(AI),
	#endif
	PointerEscapingInstr(nullptr) {
	SliceBuilder PB(DL, AI, *this);
	SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
	if (PtrI.isEscaped() \|\| PtrI.isAborted()) {
	// FIXME: We should sink the escape vs. abort info into the caller nicely,
	// possibly by just storing the PtrInfo in the AllocaSlices.
	PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
	: PtrI.getAbortingInst();
	assert(PointerEscapingInstr && "Did not track a bad instruction");
	return;
	}

	llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });

	// Sort the uses. This arranges for the offsets to be in ascending order,
	// and the sizes to be in descending order.
	llvm::stable_sort(Slices);
	}

	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)

	void AllocaSlices::print(raw_ostream &OS, const_iterator I,
	StringRef Indent) const {
	printSlice(OS, I, Indent);
	OS << "\n";
	printUse(OS, I, Indent);
	}

	void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
	StringRef Indent) const {
	OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
	<< " slice #" << (I - begin())
	<< (I->isSplittable() ? " (splittable)" : "");
	}

	void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
	StringRef Indent) const {
	OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
	}

	void AllocaSlices::print(raw_ostream &OS) const {
	if (PointerEscapingInstr) {
	OS << "Can't analyze slices for alloca: " << AI << "\n"
	<< " A pointer to this alloca escaped by:\n"
	<< " " << *PointerEscapingInstr << "\n";
	return;
	}

	OS << "Slices of alloca: " << AI << "\n";
	for (const_iterator I = begin(), E = end(); I != E; ++I)
	print(OS, I);
	}

	LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
	print(dbgs(), I);
	}
	LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }

	#endif // !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)

	/// Walk the range of a partitioning looking for a common type to cover this
	/// sequence of slices.
	static std::pair<Type , IntegerType >
	findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
	uint64_t EndOffset) {
	Type *Ty = nullptr;
	bool TyIsCommon = true;
	IntegerType *ITy = nullptr;

	// Note that we need to look at every alloca slice's Use to ensure we
	// always get consistent results regardless of the order of slices.
	for (AllocaSlices::const_iterator I = B; I != E; ++I) {
	Use *U = I->getUse();
	if (isa<IntrinsicInst>(*U->getUser()))
	continue;
	if (I->beginOffset() != B->beginOffset() \|\| I->endOffset() != EndOffset)
	continue;

	Type *UserTy = nullptr;
	if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
	UserTy = LI->getType();
	} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
	UserTy = SI->getValueOperand()->getType();
	}

	if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
	// If the type is larger than the partition, skip it. We only encounter
	// this for split integer operations where we want to use the type of the
	// entity causing the split. Also skip if the type is not a byte width
	// multiple.
	if (UserITy->getBitWidth() % 8 != 0 \|\|
	UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
	continue;

	// Track the largest bitwidth integer type used in this way in case there
	// is no common type.
	if (!ITy \|\| ITy->getBitWidth() < UserITy->getBitWidth())
	ITy = UserITy;
	}

	// To avoid depending on the order of slices, Ty and TyIsCommon must not
	// depend on types skipped above.
	if (!UserTy \|\| (Ty && Ty != UserTy))
	TyIsCommon = false; // Give up on anything but an iN type.
	else
	Ty = UserTy;
	}

	return {TyIsCommon ? Ty : nullptr, ITy};
	}

	/// PHI instructions that use an alloca and are subsequently loaded can be
	/// rewritten to load both input pointers in the pred blocks and then PHI the
	/// results, allowing the load of the alloca to be promoted.
	/// From this:
	/// %P2 = phi [i32* %Alloca, i32* %Other]
	/// %V = load i32* %P2
	/// to:
	/// %V1 = load i32* %Alloca -> will be mem2reg'd
	/// ...
	/// %V2 = load i32* %Other
	/// ...
	/// %V = phi [i32 %V1, i32 %V2]
	///
	/// We can do this to a select if its only uses are loads and if the operands
	/// to the select can be loaded unconditionally.
	///
	/// FIXME: This should be hoisted into a generic utility, likely in
	/// Transforms/Util/Local.h
	static bool isSafePHIToSpeculate(PHINode &PN) {
	const DataLayout &DL = PN.getModule()->getDataLayout();

	// For now, we can only do this promotion if the load is in the same block
	// as the PHI, and if there are no stores between the phi and load.
	// TODO: Allow recursive phi users.
	// TODO: Allow stores.
	BasicBlock *BB = PN.getParent();
	Align MaxAlign;
	uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
	- APInt MaxSize(APWidth, 0);
	- bool HaveLoad = false;
	+ Type *LoadType = nullptr;
	for (User *U : PN.users()) {
	LoadInst *LI = dyn_cast<LoadInst>(U);
	if (!LI \|\| !LI->isSimple())
	return false;

	// For now we only allow loads in the same block as the PHI. This is
	// a common case that happens when instcombine merges two loads through
	// a PHI.
	if (LI->getParent() != BB)
	return false;

	+ if (LoadType) {
	+ if (LoadType != LI->getType())
	+ return false;
	+ } else {
	+ LoadType = LI->getType();
	+ }
	+
	// Ensure that there are no instructions between the PHI and the load that
	// could store.
	for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
	if (BBI->mayWriteToMemory())
	return false;

	- uint64_t Size = DL.getTypeStoreSize(LI->getType()).getFixedSize();
	MaxAlign = std::max(MaxAlign, LI->getAlign());
	- MaxSize = MaxSize.ult(Size) ? APInt(APWidth, Size) : MaxSize;
	- HaveLoad = true;
	}

	- if (!HaveLoad)
	+ if (!LoadType)
	return false;

	+ APInt LoadSize = APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedSize());
	+
	// We can only transform this if it is safe to push the loads into the
	// predecessor blocks. The only thing to watch out for is that we can't put
	// a possibly trapping load in the predecessor if it is a critical edge.
	for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
	Instruction *TI = PN.getIncomingBlock(Idx)->getTerminator();
	Value *InVal = PN.getIncomingValue(Idx);

	// If the value is produced by the terminator of the predecessor (an
	// invoke) or it has side-effects, there is no valid place to put a load
	// in the predecessor.
	if (TI == InVal \|\| TI->mayHaveSideEffects())
	return false;

	// If the predecessor has a single successor, then the edge isn't
	// critical.
	if (TI->getNumSuccessors() == 1)
	continue;

	// If this pointer is always safe to load, or if we can prove that there
	// is already a load in the block, then we can move the load to the pred
	// block.
	- if (isSafeToLoadUnconditionally(InVal, MaxAlign, MaxSize, DL, TI))
	+ if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
	continue;

	return false;
	}

	return true;
	}

	static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
	LLVM_DEBUG(dbgs() << " original: " << PN << "\n");

	LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
	Type *LoadTy = SomeLoad->getType();
	IRB.SetInsertPoint(&PN);
	PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
	PN.getName() + ".sroa.speculated");

	// Get the AA tags and alignment to use from one of the loads. It does not
	// matter which one we get and if any differ.
	AAMDNodes AATags = SomeLoad->getAAMetadata();
	Align Alignment = SomeLoad->getAlign();

	// Rewrite all loads of the PN to use the new PHI.
	while (!PN.use_empty()) {
	LoadInst *LI = cast<LoadInst>(PN.user_back());
	LI->replaceAllUsesWith(NewPN);
	LI->eraseFromParent();
	}

	// Inject loads into all of the pred blocks.
	DenseMap<BasicBlock, Value> InjectedLoads;
	for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
	BasicBlock *Pred = PN.getIncomingBlock(Idx);
	Value *InVal = PN.getIncomingValue(Idx);

	// A PHI node is allowed to have multiple (duplicated) entries for the same
	// basic block, as long as the value is the same. So if we already injected
	// a load in the predecessor, then we should reuse the same load for all
	// duplicated entries.
	if (Value* V = InjectedLoads.lookup(Pred)) {
	NewPN->addIncoming(V, Pred);
	continue;
	}

	Instruction *TI = Pred->getTerminator();
	IRB.SetInsertPoint(TI);

	LoadInst *Load = IRB.CreateAlignedLoad(
	LoadTy, InVal, Alignment,
	(PN.getName() + ".sroa.speculate.load." + Pred->getName()));
	++NumLoadsSpeculated;
	if (AATags)
	Load->setAAMetadata(AATags);
	NewPN->addIncoming(Load, Pred);
	InjectedLoads[Pred] = Load;
	}

	LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
	PN.eraseFromParent();
	}

	/// Select instructions that use an alloca and are subsequently loaded can be
	/// rewritten to load both input pointers and then select between the result,
	/// allowing the load of the alloca to be promoted.
	/// From this:
	/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
	/// %V = load i32* %P2
	/// to:
	/// %V1 = load i32* %Alloca -> will be mem2reg'd
	/// %V2 = load i32* %Other
	/// %V = select i1 %cond, i32 %V1, i32 %V2
	///
	/// We can do this to a select if its only uses are loads and if the operand
	/// to the select can be loaded unconditionally. If found an intervening bitcast
	/// with a single use of the load, allow the promotion.
	static bool isSafeSelectToSpeculate(SelectInst &SI) {
	Value *TValue = SI.getTrueValue();
	Value *FValue = SI.getFalseValue();
	const DataLayout &DL = SI.getModule()->getDataLayout();

	for (User *U : SI.users()) {
	LoadInst *LI;
	BitCastInst *BC = dyn_cast<BitCastInst>(U);
	if (BC && BC->hasOneUse())
	LI = dyn_cast<LoadInst>(*BC->user_begin());
	else
	LI = dyn_cast<LoadInst>(U);

	if (!LI \|\| !LI->isSimple())
	return false;

	// Both operands to the select need to be dereferenceable, either
	// absolutely (e.g. allocas) or at this point because we can see other
	// accesses to it.
	if (!isSafeToLoadUnconditionally(TValue, LI->getType(),
	LI->getAlign(), DL, LI))
	return false;
	if (!isSafeToLoadUnconditionally(FValue, LI->getType(),
	LI->getAlign(), DL, LI))
	return false;
	}

	return true;
	}

	static void speculateSelectInstLoads(IRBuilderTy &IRB, SelectInst &SI) {
	LLVM_DEBUG(dbgs() << " original: " << SI << "\n");

	IRB.SetInsertPoint(&SI);
	Value *TV = SI.getTrueValue();
	Value *FV = SI.getFalseValue();
	// Replace the loads of the select with a select of two loads.
	while (!SI.use_empty()) {
	LoadInst *LI;
	BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
	if (BC) {
	assert(BC->hasOneUse() && "Bitcast should have a single use.");
	LI = cast<LoadInst>(BC->user_back());
	} else {
	LI = cast<LoadInst>(SI.user_back());
	}

	assert(LI->isSimple() && "We only speculate simple loads");

	IRB.SetInsertPoint(LI);
	Value *NewTV =
	BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
	: TV;
	Value *NewFV =
	BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
	: FV;
	LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
	LI->getName() + ".sroa.speculate.load.true");
	LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
	LI->getName() + ".sroa.speculate.load.false");
	NumLoadsSpeculated += 2;

	// Transfer alignment and AA info if present.
	TL->setAlignment(LI->getAlign());
	FL->setAlignment(LI->getAlign());

	AAMDNodes Tags = LI->getAAMetadata();
	if (Tags) {
	TL->setAAMetadata(Tags);
	FL->setAAMetadata(Tags);
	}

	Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
	LI->getName() + ".sroa.speculated");

	LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
	LI->replaceAllUsesWith(V);
	LI->eraseFromParent();
	if (BC)
	BC->eraseFromParent();
	}
	SI.eraseFromParent();
	}

	/// Build a GEP out of a base pointer and indices.
	///
	/// This will return the BasePtr if that is valid, or build a new GEP
	/// instruction using the IRBuilder if GEP-ing is needed.
	static Value buildGEP(IRBuilderTy &IRB, Value BasePtr,
	SmallVectorImpl<Value *> &Indices,
	const Twine &NamePrefix) {
	if (Indices.empty())
	return BasePtr;

	// A single zero index is a no-op, so check for this and avoid building a GEP
	// in that case.
	if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
	return BasePtr;

	// buildGEP() is only called for non-opaque pointers.
	return IRB.CreateInBoundsGEP(
	BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices,
	NamePrefix + "sroa_idx");
	}

	/// Get a natural GEP off of the BasePtr walking through Ty toward
	/// TargetTy without changing the offset of the pointer.
	///
	/// This routine assumes we've already established a properly offset GEP with
	/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
	/// zero-indices down through type layers until we find one the same as
	/// TargetTy. If we can't find one with the same type, we at least try to use
	/// one with the same size. If none of that works, we just produce the GEP as
	/// indicated by Indices to have the correct offset.
	static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
	Value BasePtr, Type Ty, Type *TargetTy,
	SmallVectorImpl<Value *> &Indices,
	const Twine &NamePrefix) {
	if (Ty == TargetTy)
	return buildGEP(IRB, BasePtr, Indices, NamePrefix);

	// Offset size to use for the indices.
	unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());

	// See if we can descend into a struct and locate a field with the correct
	// type.
	unsigned NumLayers = 0;
	Type *ElementTy = Ty;
	do {
	if (ElementTy->isPointerTy())
	break;

	if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
	ElementTy = ArrayTy->getElementType();
	Indices.push_back(IRB.getIntN(OffsetSize, 0));
	} else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
	ElementTy = VectorTy->getElementType();
	Indices.push_back(IRB.getInt32(0));
	} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
	if (STy->element_begin() == STy->element_end())
	break; // Nothing left to descend into.
	ElementTy = *STy->element_begin();
	Indices.push_back(IRB.getInt32(0));
	} else {
	break;
	}
	++NumLayers;
	} while (ElementTy != TargetTy);
	if (ElementTy != TargetTy)
	Indices.erase(Indices.end() - NumLayers, Indices.end());

	return buildGEP(IRB, BasePtr, Indices, NamePrefix);
	}

	/// Get a natural GEP from a base pointer to a particular offset and
	/// resulting in a particular type.
	///
	/// The goal is to produce a "natural" looking GEP that works with the existing
	/// composite types to arrive at the appropriate offset and element type for
	/// a pointer. TargetTy is the element type the returned GEP should point-to if
	/// possible. We recurse by decreasing Offset, adding the appropriate index to
	/// Indices, and setting Ty to the result subtype.
	///
	/// If no natural GEP can be constructed, this function returns null.
	static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
	Value Ptr, APInt Offset, Type TargetTy,
	SmallVectorImpl<Value *> &Indices,
	const Twine &NamePrefix) {
	PointerType *Ty = cast<PointerType>(Ptr->getType());

	// Don't consider any GEPs through an i8* as natural unless the TargetTy is
	// an i8.
	if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
	return nullptr;

	Type *ElementTy = Ty->getNonOpaquePointerElementType();
	if (!ElementTy->isSized())
	return nullptr; // We can't GEP through an unsized element.

	SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
	if (Offset != 0)
	return nullptr;

	for (const APInt &Index : IntIndices)
	Indices.push_back(IRB.getInt(Index));
	return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
	NamePrefix);
	}

	/// Compute an adjusted pointer from Ptr by Offset bytes where the
	/// resulting pointer has PointerTy.
	///
	/// This tries very hard to compute a "natural" GEP which arrives at the offset
	/// and produces the pointer type desired. Where it cannot, it will try to use
	/// the natural GEP to arrive at the offset and bitcast to the type. Where that
	/// fails, it will try to use an existing i8* and GEP to the byte offset and
	/// bitcast to the type.
	///
	/// The strategy for finding the more natural GEPs is to peel off layers of the
	/// pointer, walking back through bit casts and GEPs, searching for a base
	/// pointer from which we can compute a natural GEP with the desired
	/// properties. The algorithm tries to fold as many constant indices into
	/// a single GEP as possible, thus making each GEP more independent of the
	/// surrounding code.
	static Value getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value Ptr,
	APInt Offset, Type *PointerTy,
	const Twine &NamePrefix) {
	// Create i8 GEP for opaque pointers.
	if (Ptr->getType()->isOpaquePointerTy()) {
	if (Offset != 0)
	Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
	NamePrefix + "sroa_idx");
	return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
	NamePrefix + "sroa_cast");
	}

	// Even though we don't look through PHI nodes, we could be called on an
	// instruction in an unreachable block, which may be on a cycle.
	SmallPtrSet<Value *, 4> Visited;
	Visited.insert(Ptr);
	SmallVector<Value *, 4> Indices;

	// We may end up computing an offset pointer that has the wrong type. If we
	// never are able to compute one directly that has the correct type, we'll
	// fall back to it, so keep it and the base it was computed from around here.
	Value *OffsetPtr = nullptr;
	Value *OffsetBasePtr;

	// Remember any i8 pointer we come across to re-use if we need to do a raw
	// byte offset.
	Value *Int8Ptr = nullptr;
	APInt Int8PtrOffset(Offset.getBitWidth(), 0);

	PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
	Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();

	// As `addrspacecast` is , `Ptr` (the storage pointer) may have different
	// address space from the expected `PointerTy` (the pointer to be used).
	// Adjust the pointer type based the original storage pointer.
	auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
	PointerTy = TargetTy->getPointerTo(AS);

	do {
	// First fold any existing GEPs into the offset.
	while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
	APInt GEPOffset(Offset.getBitWidth(), 0);
	if (!GEP->accumulateConstantOffset(DL, GEPOffset))
	break;
	Offset += GEPOffset;
	Ptr = GEP->getPointerOperand();
	if (!Visited.insert(Ptr).second)
	break;
	}

	// See if we can perform a natural GEP here.
	Indices.clear();
	if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
	Indices, NamePrefix)) {
	// If we have a new natural pointer at the offset, clear out any old
	// offset pointer we computed. Unless it is the base pointer or
	// a non-instruction, we built a GEP we don't need. Zap it.
	if (OffsetPtr && OffsetPtr != OffsetBasePtr)
	if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
	assert(I->use_empty() && "Built a GEP with uses some how!");
	I->eraseFromParent();
	}
	OffsetPtr = P;
	OffsetBasePtr = Ptr;
	// If we also found a pointer of the right type, we're done.
	if (P->getType() == PointerTy)
	break;
	}

	// Stash this pointer if we've found an i8*.
	if (Ptr->getType()->isIntegerTy(8)) {
	Int8Ptr = Ptr;
	Int8PtrOffset = Offset;
	}

	// Peel off a layer of the pointer and update the offset appropriately.
	if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
	Ptr = cast<Operator>(Ptr)->getOperand(0);
	} else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
	if (GA->isInterposable())
	break;
	Ptr = GA->getAliasee();
	} else {
	break;
	}
	assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
	} while (Visited.insert(Ptr).second);

	if (!OffsetPtr) {
	if (!Int8Ptr) {
	Int8Ptr = IRB.CreateBitCast(
	Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
	NamePrefix + "sroa_raw_cast");
	Int8PtrOffset = Offset;
	}

	OffsetPtr = Int8PtrOffset == 0
	? Int8Ptr
	: IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
	IRB.getInt(Int8PtrOffset),
	NamePrefix + "sroa_raw_idx");
	}
	Ptr = OffsetPtr;

	// On the off chance we were targeting i8*, guard the bitcast here.
	if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
	Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
	TargetPtrTy,
	NamePrefix + "sroa_cast");
	}

	return Ptr;
	}

	/// Compute the adjusted alignment for a load or store from an offset.
	static Align getAdjustedAlignment(Instruction *I, uint64_t Offset) {
	return commonAlignment(getLoadStoreAlignment(I), Offset);
	}

	/// Test whether we can convert a value from the old to the new type.
	///
	/// This predicate should be used to guard calls to convertValue in order to
	/// ensure that we only try to convert viable values. The strategy is that we
	/// will peel off single element struct and array wrappings to get to an
	/// underlying value, and convert that value.
	static bool canConvertValue(const DataLayout &DL, Type OldTy, Type NewTy) {
	if (OldTy == NewTy)
	return true;

	// For integer types, we can't handle any bit-width differences. This would
	// break both vector conversions with extension and introduce endianness
	// issues when in conjunction with loads and stores.
	if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
	assert(cast<IntegerType>(OldTy)->getBitWidth() !=
	cast<IntegerType>(NewTy)->getBitWidth() &&
	"We can't have the same bitwidth for different int types");
	return false;
	}

	if (DL.getTypeSizeInBits(NewTy).getFixedSize() !=
	DL.getTypeSizeInBits(OldTy).getFixedSize())
	return false;
	if (!NewTy->isSingleValueType() \|\| !OldTy->isSingleValueType())
	return false;

	// We can convert pointers to integers and vice-versa. Same for vectors
	// of pointers and integers.
	OldTy = OldTy->getScalarType();
	NewTy = NewTy->getScalarType();
	if (NewTy->isPointerTy() \|\| OldTy->isPointerTy()) {
	if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
	unsigned OldAS = OldTy->getPointerAddressSpace();
	unsigned NewAS = NewTy->getPointerAddressSpace();
	// Convert pointers if they are pointers from the same address space or
	// different integral (not non-integral) address spaces with the same
	// pointer size.
	return OldAS == NewAS \|\|
	(!DL.isNonIntegralAddressSpace(OldAS) &&
	!DL.isNonIntegralAddressSpace(NewAS) &&
	DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
	}

	// We can convert integers to integral pointers, but not to non-integral
	// pointers.
	if (OldTy->isIntegerTy())
	return !DL.isNonIntegralPointerType(NewTy);

	// We can convert integral pointers to integers, but non-integral pointers
	// need to remain pointers.
	if (!DL.isNonIntegralPointerType(OldTy))
	return NewTy->isIntegerTy();

	return false;
	}

	return true;
	}

	/// Generic routine to convert an SSA value to a value of a different
	/// type.
	///
	/// This will try various different casting techniques, such as bitcasts,
	/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
	/// two types for viability with this routine.
	static Value convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value V,
	Type *NewTy) {
	Type *OldTy = V->getType();
	assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");

	if (OldTy == NewTy)
	return V;

	assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
	"Integer types must be the exact same to convert.");

	// See if we need inttoptr for this type pair. May require additional bitcast.
	if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
	// Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
	// Expand i128 to <2 x i8> --> i128 to <2 x i64> to <2 x i8>
	// Expand <4 x i32> to <2 x i8> --> <4 x i32> to <2 x i64> to <2 x i8>
	// Directly handle i64 to i8*
	return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
	NewTy);
	}

	// See if we need ptrtoint for this type pair. May require additional bitcast.
	if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
	// Expand <2 x i8> to i128 --> <2 x i8> to <2 x i64> to i128
	// Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
	// Expand <2 x i8> to <4 x i32> --> <2 x i8> to <2 x i64> to <4 x i32>
	// Expand i8* to i64 --> i8* to i64 to i64
	return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
	NewTy);
	}

	if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
	unsigned OldAS = OldTy->getPointerAddressSpace();
	unsigned NewAS = NewTy->getPointerAddressSpace();
	// To convert pointers with different address spaces (they are already
	// checked convertible, i.e. they have the same pointer size), so far we
	// cannot use `bitcast` (which has restrict on the same address space) or
	// `addrspacecast` (which is not always no-op casting). Instead, use a pair
	// of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
	// size.
	if (OldAS != NewAS) {
	assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
	return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
	NewTy);
	}
	}

	return IRB.CreateBitCast(V, NewTy);
	}

	/// Test whether the given slice use can be promoted to a vector.
	///
	/// This function is called to test each entry in a partition which is slated
	/// for a single slice.
	static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
	VectorType *Ty,
	uint64_t ElementSize,
	const DataLayout &DL) {
	// First validate the slice offsets.
	uint64_t BeginOffset =
	std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
	uint64_t BeginIndex = BeginOffset / ElementSize;
	if (BeginIndex * ElementSize != BeginOffset \|\|
	BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
	return false;
	uint64_t EndOffset =
	std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
	uint64_t EndIndex = EndOffset / ElementSize;
	if (EndIndex * ElementSize != EndOffset \|\|
	EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
	return false;

	assert(EndIndex > BeginIndex && "Empty vector!");
	uint64_t NumElements = EndIndex - BeginIndex;
	Type *SliceTy = (NumElements == 1)
	? Ty->getElementType()
	: FixedVectorType::get(Ty->getElementType(), NumElements);

	Type *SplitIntTy =
	Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);

	Use *U = S.getUse();

	if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
	if (MI->isVolatile())
	return false;
	if (!S.isSplittable())
	return false; // Skip any unsplittable intrinsics.
	} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
	if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
	return false;
	} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
	if (LI->isVolatile())
	return false;
	Type *LTy = LI->getType();
	// Disable vector promotion when there are loads or stores of an FCA.
	if (LTy->isStructTy())
	return false;
	if (P.beginOffset() > S.beginOffset() \|\| P.endOffset() < S.endOffset()) {
	assert(LTy->isIntegerTy());
	LTy = SplitIntTy;
	}
	if (!canConvertValue(DL, SliceTy, LTy))
	return false;
	} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
	if (SI->isVolatile())
	return false;
	Type *STy = SI->getValueOperand()->getType();
	// Disable vector promotion when there are loads or stores of an FCA.
	if (STy->isStructTy())
	return false;
	if (P.beginOffset() > S.beginOffset() \|\| P.endOffset() < S.endOffset()) {
	assert(STy->isIntegerTy());
	STy = SplitIntTy;
	}
	if (!canConvertValue(DL, STy, SliceTy))
	return false;
	} else {
	return false;
	}

	return true;
	}

	/// Test whether the given alloca partitioning and range of slices can be
	/// promoted to a vector.
	///
	/// This is a quick test to check whether we can rewrite a particular alloca
	/// partition (and its newly formed alloca) into a vector alloca with only
	/// whole-vector loads and stores such that it could be promoted to a vector
	/// SSA value. We only can ensure this for a limited set of operations, and we
	/// don't want to do the rewrites unless we are confident that the result will
	/// be promotable, so we have an early test here.
	static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
	// Collect the candidate types for vector-based promotion. Also track whether
	// we have different element types.
	SmallVector<VectorType *, 4> CandidateTys;
	Type *CommonEltTy = nullptr;
	bool HaveCommonEltTy = true;
	auto CheckCandidateType = [&](Type *Ty) {
	if (auto *VTy = dyn_cast<VectorType>(Ty)) {
	// Return if bitcast to vectors is different for total size in bits.
	if (!CandidateTys.empty()) {
	VectorType *V = CandidateTys[0];
	if (DL.getTypeSizeInBits(VTy).getFixedSize() !=
	DL.getTypeSizeInBits(V).getFixedSize()) {
	CandidateTys.clear();
	return;
	}
	}
	CandidateTys.push_back(VTy);
	if (!CommonEltTy)
	CommonEltTy = VTy->getElementType();
	else if (CommonEltTy != VTy->getElementType())
	HaveCommonEltTy = false;
	}
	};
	// Consider any loads or stores that are the exact size of the slice.
	for (const Slice &S : P)
	if (S.beginOffset() == P.beginOffset() &&
	S.endOffset() == P.endOffset()) {
	if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
	CheckCandidateType(LI->getType());
	else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
	CheckCandidateType(SI->getValueOperand()->getType());
	}

	// If we didn't find a vector type, nothing to do here.
	if (CandidateTys.empty())
	return nullptr;

	// Remove non-integer vector types if we had multiple common element types.
	// FIXME: It'd be nice to replace them with integer vector types, but we can't
	// do that until all the backends are known to produce good code for all
	// integer vector types.
	if (!HaveCommonEltTy) {
	llvm::erase_if(CandidateTys, [](VectorType *VTy) {
	return !VTy->getElementType()->isIntegerTy();
	});

	// If there were no integer vector types, give up.
	if (CandidateTys.empty())
	return nullptr;

	// Rank the remaining candidate vector types. This is easy because we know
	// they're all integer vectors. We sort by ascending number of elements.
	auto RankVectorTypes = [&DL](VectorType RHSTy, VectorType LHSTy) {
	(void)DL;
	assert(DL.getTypeSizeInBits(RHSTy).getFixedSize() ==
	DL.getTypeSizeInBits(LHSTy).getFixedSize() &&
	"Cannot have vector types of different sizes!");
	assert(RHSTy->getElementType()->isIntegerTy() &&
	"All non-integer types eliminated!");
	assert(LHSTy->getElementType()->isIntegerTy() &&
	"All non-integer types eliminated!");
	return cast<FixedVectorType>(RHSTy)->getNumElements() <
	cast<FixedVectorType>(LHSTy)->getNumElements();
	};
	llvm::sort(CandidateTys, RankVectorTypes);
	CandidateTys.erase(
	std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
	CandidateTys.end());
	} else {
	// The only way to have the same element type in every vector type is to
	// have the same vector type. Check that and remove all but one.
	#ifndef NDEBUG
	for (VectorType *VTy : CandidateTys) {
	assert(VTy->getElementType() == CommonEltTy &&
	"Unaccounted for element type!");
	assert(VTy == CandidateTys[0] &&
	"Different vector types with the same element type!");
	}
	#endif
	CandidateTys.resize(1);
	}

	// Try each vector type, and return the one which works.
	auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
	uint64_t ElementSize =
	DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();

	// While the definition of LLVM vectors is bitpacked, we don't support sizes
	// that aren't byte sized.
	if (ElementSize % 8)
	return false;
	assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&
	"vector size not a multiple of element size?");
	ElementSize /= 8;

	for (const Slice &S : P)
	if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
	return false;

	for (const Slice *S : P.splitSliceTails())
	if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
	return false;

	return true;
	};
	for (VectorType *VTy : CandidateTys)
	if (CheckVectorTypeForPromotion(VTy))
	return VTy;

	return nullptr;
	}

	/// Test whether a slice of an alloca is valid for integer widening.
	///
	/// This implements the necessary checking for the \c isIntegerWideningViable
	/// test below on a single slice of the alloca.
	static bool isIntegerWideningViableForSlice(const Slice &S,
	uint64_t AllocBeginOffset,
	Type *AllocaTy,
	const DataLayout &DL,
	bool &WholeAllocaOp) {
	uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedSize();

	uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
	uint64_t RelEnd = S.endOffset() - AllocBeginOffset;

	Use *U = S.getUse();

	// Lifetime intrinsics operate over the whole alloca whose sizes are usually
	// larger than other load/store slices (RelEnd > Size). But lifetime are
	// always promotable and should not impact other slices' promotability of the
	// partition.
	if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
	if (II->isLifetimeStartOrEnd() \|\| II->isDroppable())
	return true;
	}

	// We can't reasonably handle cases where the load or store extends past
	// the end of the alloca's type and into its padding.
	if (RelEnd > Size)
	return false;

	if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
	if (LI->isVolatile())
	return false;
	// We can't handle loads that extend past the allocated memory.
	if (DL.getTypeStoreSize(LI->getType()).getFixedSize() > Size)
	return false;
	// So far, AllocaSliceRewriter does not support widening split slice tails
	// in rewriteIntegerLoad.
	if (S.beginOffset() < AllocBeginOffset)
	return false;
	// Note that we don't count vector loads or stores as whole-alloca
	// operations which enable integer widening because we would prefer to use
	// vector widening instead.
	if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
	WholeAllocaOp = true;
	if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
	if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
	return false;
	} else if (RelBegin != 0 \|\| RelEnd != Size \|\|
	!canConvertValue(DL, AllocaTy, LI->getType())) {
	// Non-integer loads need to be convertible from the alloca type so that
	// they are promotable.
	return false;
	}
	} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
	Type *ValueTy = SI->getValueOperand()->getType();
	if (SI->isVolatile())
	return false;
	// We can't handle stores that extend past the allocated memory.
	if (DL.getTypeStoreSize(ValueTy).getFixedSize() > Size)
	return false;
	// So far, AllocaSliceRewriter does not support widening split slice tails
	// in rewriteIntegerStore.
	if (S.beginOffset() < AllocBeginOffset)
	return false;
	// Note that we don't count vector loads or stores as whole-alloca
	// operations which enable integer widening because we would prefer to use
	// vector widening instead.
	if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
	WholeAllocaOp = true;
	if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
	if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedSize())
	return false;
	} else if (RelBegin != 0 \|\| RelEnd != Size \|\|
	!canConvertValue(DL, ValueTy, AllocaTy)) {
	// Non-integer stores need to be convertible to the alloca type so that
	// they are promotable.
	return false;
	}
	} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
	if (MI->isVolatile() \|\| !isa<Constant>(MI->getLength()))
	return false;
	if (!S.isSplittable())
	return false; // Skip any unsplittable intrinsics.
	} else {
	return false;
	}

	return true;
	}

	/// Test whether the given alloca partition's integer operations can be
	/// widened to promotable ones.
	///
	/// This is a quick test to check whether we can rewrite the integer loads and
	/// stores to a particular alloca into wider loads and stores and be able to
	/// promote the resulting alloca.
	static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
	const DataLayout &DL) {
	uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedSize();
	// Don't create integer types larger than the maximum bitwidth.
	if (SizeInBits > IntegerType::MAX_INT_BITS)
	return false;

	// Don't try to handle allocas with bit-padding.
	if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize())
	return false;

	// We need to ensure that an integer type with the appropriate bitwidth can
	// be converted to the alloca type, whatever that is. We don't want to force
	// the alloca itself to have an integer type if there is a more suitable one.
	Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
	if (!canConvertValue(DL, AllocaTy, IntTy) \|\|
	!canConvertValue(DL, IntTy, AllocaTy))
	return false;

	// While examining uses, we ensure that the alloca has a covering load or
	// store. We don't want to widen the integer operations only to fail to
	// promote due to some other unsplittable entry (which we may make splittable
	// later). However, if there are only splittable uses, go ahead and assume
	// that we cover the alloca.
	// FIXME: We shouldn't consider split slices that happen to start in the
	// partition here...
	bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);

	for (const Slice &S : P)
	if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
	WholeAllocaOp))
	return false;

	for (const Slice *S : P.splitSliceTails())
	if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
	WholeAllocaOp))
	return false;

	return WholeAllocaOp;
	}

	static Value extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value V,
	IntegerType *Ty, uint64_t Offset,
	const Twine &Name) {
	LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
	IntegerType *IntTy = cast<IntegerType>(V->getType());
	assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=
	DL.getTypeStoreSize(IntTy).getFixedSize() &&
	"Element extends past full value");
	uint64_t ShAmt = 8 * Offset;
	if (DL.isBigEndian())
	ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
	DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
	if (ShAmt) {
	V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
	LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
	}
	assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
	"Cannot extract to a larger integer!");
	if (Ty != IntTy) {
	V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
	LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
	}
	return V;
	}

	static Value insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value Old,
	Value *V, uint64_t Offset, const Twine &Name) {
	IntegerType *IntTy = cast<IntegerType>(Old->getType());
	IntegerType *Ty = cast<IntegerType>(V->getType());
	assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
	"Cannot insert a larger integer!");
	LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
	if (Ty != IntTy) {
	V = IRB.CreateZExt(V, IntTy, Name + ".ext");
	LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
	}
	assert(DL.getTypeStoreSize(Ty).getFixedSize() + Offset <=
	DL.getTypeStoreSize(IntTy).getFixedSize() &&
	"Element store outside of alloca store");
	uint64_t ShAmt = 8 * Offset;
	if (DL.isBigEndian())
	ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedSize() -
	DL.getTypeStoreSize(Ty).getFixedSize() - Offset);
	if (ShAmt) {
	V = IRB.CreateShl(V, ShAmt, Name + ".shift");
	LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
	}

	if (ShAmt \|\| Ty->getBitWidth() < IntTy->getBitWidth()) {
	APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
	Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
	LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
	V = IRB.CreateOr(Old, V, Name + ".insert");
	LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
	}
	return V;
	}

	static Value extractVector(IRBuilderTy &IRB, Value V, unsigned BeginIndex,
	unsigned EndIndex, const Twine &Name) {
	auto *VecTy = cast<FixedVectorType>(V->getType());
	unsigned NumElements = EndIndex - BeginIndex;
	assert(NumElements <= VecTy->getNumElements() && "Too many elements!");

	if (NumElements == VecTy->getNumElements())
	return V;

	if (NumElements == 1) {
	V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
	Name + ".extract");
	LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
	return V;
	}

	auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
	V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
	LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
	return V;
	}

	static Value insertVector(IRBuilderTy &IRB, Value Old, Value *V,
	unsigned BeginIndex, const Twine &Name) {
	VectorType *VecTy = cast<VectorType>(Old->getType());
	assert(VecTy && "Can only insert a vector into a vector");

	VectorType *Ty = dyn_cast<VectorType>(V->getType());
	if (!Ty) {
	// Single element to insert.
	V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
	Name + ".insert");
	LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
	return V;
	}

	assert(cast<FixedVectorType>(Ty)->getNumElements() <=
	cast<FixedVectorType>(VecTy)->getNumElements() &&
	"Too many elements!");
	if (cast<FixedVectorType>(Ty)->getNumElements() ==
	cast<FixedVectorType>(VecTy)->getNumElements()) {
	assert(V->getType() == VecTy && "Vector type mismatch");
	return V;
	}
	unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();

	// When inserting a smaller vector into the larger to store, we first
	// use a shuffle vector to widen it with undef elements, and then
	// a second shuffle vector to select between the loaded vector and the
	// incoming vector.
	SmallVector<int, 8> Mask;
	Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
	for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
	if (i >= BeginIndex && i < EndIndex)
	Mask.push_back(i - BeginIndex);
	else
	Mask.push_back(-1);
	V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
	LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");

	SmallVector<Constant *, 8> Mask2;
	Mask2.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
	for (unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
	Mask2.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));

	V = IRB.CreateSelect(ConstantVector::get(Mask2), V, Old, Name + "blend");

	LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
	return V;
	}

	/// Visitor to rewrite instructions using p particular slice of an alloca
	/// to use a new alloca.
	///
	/// Also implements the rewriting to vector-based accesses when the partition
	/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
	/// lives here.
	class llvm::sroa::AllocaSliceRewriter
	: public InstVisitor<AllocaSliceRewriter, bool> {
	// Befriend the base class so it can delegate to private visit methods.
	friend class InstVisitor<AllocaSliceRewriter, bool>;

	using Base = InstVisitor<AllocaSliceRewriter, bool>;

	const DataLayout &DL;
	AllocaSlices &AS;
	SROAPass &Pass;
	AllocaInst &OldAI, &NewAI;
	const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
	Type *NewAllocaTy;

	// This is a convenience and flag variable that will be null unless the new
	// alloca's integer operations should be widened to this integer type due to
	// passing isIntegerWideningViable above. If it is non-null, the desired
	// integer type will be stored here for easy access during rewriting.
	IntegerType *IntTy;

	// If we are rewriting an alloca partition which can be written as pure
	// vector operations, we stash extra information here. When VecTy is
	// non-null, we have some strict guarantees about the rewritten alloca:
	// - The new alloca is exactly the size of the vector type here.
	// - The accesses all either map to the entire vector or to a single
	// element.
	// - The set of accessing instructions is only one of those handled above
	// in isVectorPromotionViable. Generally these are the same access kinds
	// which are promotable via mem2reg.
	VectorType *VecTy;
	Type *ElementTy;
	uint64_t ElementSize;

	// The original offset of the slice currently being rewritten relative to
	// the original alloca.
	uint64_t BeginOffset = 0;
	uint64_t EndOffset = 0;

	// The new offsets of the slice currently being rewritten relative to the
	// original alloca.
	uint64_t NewBeginOffset = 0, NewEndOffset = 0;

	uint64_t SliceSize = 0;
	bool IsSplittable = false;
	bool IsSplit = false;
	Use *OldUse = nullptr;
	Instruction *OldPtr = nullptr;

	// Track post-rewrite users which are PHI nodes and Selects.
	SmallSetVector<PHINode *, 8> &PHIUsers;
	SmallSetVector<SelectInst *, 8> &SelectUsers;

	// Utility IR builder, whose name prefix is setup for each visited use, and
	// the insertion point is set to point to the user.
	IRBuilderTy IRB;

	public:
	AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
	AllocaInst &OldAI, AllocaInst &NewAI,
	uint64_t NewAllocaBeginOffset,
	uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
	VectorType *PromotableVecTy,
	SmallSetVector<PHINode *, 8> &PHIUsers,
	SmallSetVector<SelectInst *, 8> &SelectUsers)
	: DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
	NewAllocaBeginOffset(NewAllocaBeginOffset),
	NewAllocaEndOffset(NewAllocaEndOffset),
	NewAllocaTy(NewAI.getAllocatedType()),
	IntTy(
	IsIntegerPromotable
	? Type::getIntNTy(NewAI.getContext(),
	DL.getTypeSizeInBits(NewAI.getAllocatedType())
	.getFixedSize())
	: nullptr),
	VecTy(PromotableVecTy),
	ElementTy(VecTy ? VecTy->getElementType() : nullptr),
	ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8
	: 0),
	PHIUsers(PHIUsers), SelectUsers(SelectUsers),
	IRB(NewAI.getContext(), ConstantFolder()) {
	if (VecTy) {
	assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 &&
	"Only multiple-of-8 sized vector elements are viable");
	++NumVectorized;
	}
	assert((!IntTy && !VecTy) \|\| (IntTy && !VecTy) \|\| (!IntTy && VecTy));
	}

	bool visit(AllocaSlices::const_iterator I) {
	bool CanSROA = true;
	BeginOffset = I->beginOffset();
	EndOffset = I->endOffset();
	IsSplittable = I->isSplittable();
	IsSplit =
	BeginOffset < NewAllocaBeginOffset \|\| EndOffset > NewAllocaEndOffset;
	LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
	LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
	LLVM_DEBUG(dbgs() << "\n");

	// Compute the intersecting offset range.
	assert(BeginOffset < NewAllocaEndOffset);
	assert(EndOffset > NewAllocaBeginOffset);
	NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
	NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);

	SliceSize = NewEndOffset - NewBeginOffset;

	OldUse = I->getUse();
	OldPtr = cast<Instruction>(OldUse->get());

	Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
	IRB.SetInsertPoint(OldUserI);
	IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
	IRB.getInserter().SetNamePrefix(
	Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");

	CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
	if (VecTy \|\| IntTy)
	assert(CanSROA);
	return CanSROA;
	}

	private:
	// Make sure the other visit overloads are visible.
	using Base::visit;

	// Every instruction which can end up as a user must have a rewrite rule.
	bool visitInstruction(Instruction &I) {
	LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
	llvm_unreachable("No rewrite rule for this instruction!");
	}

	Value getNewAllocaSlicePtr(IRBuilderTy &IRB, Type PointerTy) {
	// Note that the offset computation can use BeginOffset or NewBeginOffset
	// interchangeably for unsplit slices.
	assert(IsSplit \|\| BeginOffset == NewBeginOffset);
	uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;

	#ifndef NDEBUG
	StringRef OldName = OldPtr->getName();
	// Skip through the last '.sroa.' component of the name.
	size_t LastSROAPrefix = OldName.rfind(".sroa.");
	if (LastSROAPrefix != StringRef::npos) {
	OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
	// Look for an SROA slice index.
	size_t IndexEnd = OldName.find_first_not_of("0123456789");
	if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
	// Strip the index and look for the offset.
	OldName = OldName.substr(IndexEnd + 1);
	size_t OffsetEnd = OldName.find_first_not_of("0123456789");
	if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
	// Strip the offset.
	OldName = OldName.substr(OffsetEnd + 1);
	}
	}
	// Strip any SROA suffixes as well.
	OldName = OldName.substr(0, OldName.find(".sroa_"));
	#endif

	return getAdjustedPtr(IRB, DL, &NewAI,
	APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
	PointerTy,
	#ifndef NDEBUG
	Twine(OldName) + "."
	#else
	Twine()
	#endif
	);
	}

	/// Compute suitable alignment to access this slice of the new
	/// alloca.
	///
	/// You can optionally pass a type to this routine and if that type's ABI
	/// alignment is itself suitable, this will return zero.
	Align getSliceAlign() {
	return commonAlignment(NewAI.getAlign(),
	NewBeginOffset - NewAllocaBeginOffset);
	}

	unsigned getIndex(uint64_t Offset) {
	assert(VecTy && "Can only call getIndex when rewriting a vector");
	uint64_t RelOffset = Offset - NewAllocaBeginOffset;
	assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
	uint32_t Index = RelOffset / ElementSize;
	assert(Index * ElementSize == RelOffset);
	return Index;
	}

	void deleteIfTriviallyDead(Value *V) {
	Instruction *I = cast<Instruction>(V);
	if (isInstructionTriviallyDead(I))
	Pass.DeadInsts.push_back(I);
	}

	Value *rewriteVectorizedLoadInst(LoadInst &LI) {
	unsigned BeginIndex = getIndex(NewBeginOffset);
	unsigned EndIndex = getIndex(NewEndOffset);
	assert(EndIndex > BeginIndex && "Empty vector!");

	LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "load");

	Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
	}

	Value *rewriteIntegerLoad(LoadInst &LI) {
	assert(IntTy && "We cannot insert an integer to the alloca");
	assert(!LI.isVolatile());
	Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "load");
	V = convertValue(DL, IRB, V, IntTy);
	assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
	uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
	if (Offset > 0 \|\| NewEndOffset < NewAllocaEndOffset) {
	IntegerType ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize 8);
	V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
	}
	// It is possible that the extracted type is not the load type. This
	// happens if there is a load past the end of the alloca, and as
	// a consequence the slice is narrower but still a candidate for integer
	// lowering. To handle this case, we just zero extend the extracted
	// integer.
	assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
	"Can only handle an extract for an overly wide load");
	if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
	V = IRB.CreateZExt(V, LI.getType());
	return V;
	}

	bool visitLoadInst(LoadInst &LI) {
	LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
	Value *OldOp = LI.getOperand(0);
	assert(OldOp == OldPtr);

	AAMDNodes AATags = LI.getAAMetadata();

	unsigned AS = LI.getPointerAddressSpace();

	Type TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize 8)
	: LI.getType();
	const bool IsLoadPastEnd =
	DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize;
	bool IsPtrAdjusted = false;
	Value *V;
	if (VecTy) {
	V = rewriteVectorizedLoadInst(LI);
	} else if (IntTy && LI.getType()->isIntegerTy()) {
	V = rewriteIntegerLoad(LI);
	} else if (NewBeginOffset == NewAllocaBeginOffset &&
	NewEndOffset == NewAllocaEndOffset &&
	(canConvertValue(DL, NewAllocaTy, TargetTy) \|\|
	(IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
	TargetTy->isIntegerTy()))) {
	LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), LI.isVolatile(),
	LI.getName());
	if (AATags)
	NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	if (LI.isVolatile())
	NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
	if (NewLI->isAtomic())
	NewLI->setAlignment(LI.getAlign());

	// Any !nonnull metadata or !range metadata on the old load is also valid
	// on the new load. This is even true in some cases even when the loads
	// are different types, for example by mapping !nonnull metadata to
	// !range metadata by modeling the null pointer constant converted to the
	// integer type.
	// FIXME: Add support for range metadata here. Currently the utilities
	// for this don't propagate range metadata in trivial cases from one
	// integer load to another, don't handle non-addrspace-0 null pointers
	// correctly, and don't have any support for mapping ranges as the
	// integer type becomes winder or narrower.
	if (MDNode *N = LI.getMetadata(LLVMContext::MD_nonnull))
	copyNonnullMetadata(LI, N, *NewLI);

	// Try to preserve nonnull metadata
	V = NewLI;

	// If this is an integer load past the end of the slice (which means the
	// bytes outside the slice are undef or this load is dead) just forcibly
	// fix the integer size with correct handling of endianness.
	if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
	if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
	if (AITy->getBitWidth() < TITy->getBitWidth()) {
	V = IRB.CreateZExt(V, TITy, "load.ext");
	if (DL.isBigEndian())
	V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
	"endian_shift");
	}
	} else {
	Type *LTy = TargetTy->getPointerTo(AS);
	LoadInst *NewLI =
	IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
	getSliceAlign(), LI.isVolatile(), LI.getName());
	if (AATags)
	NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	if (LI.isVolatile())
	NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
	NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});

	V = NewLI;
	IsPtrAdjusted = true;
	}
	V = convertValue(DL, IRB, V, TargetTy);

	if (IsSplit) {
	assert(!LI.isVolatile());
	assert(LI.getType()->isIntegerTy() &&
	"Only integer type loads and stores are split");
	assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedSize() &&
	"Split load isn't smaller than original load");
	assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
	"Non-byte-multiple bit width");
	// Move the insertion point just past the load so that we can refer to it.
	IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
	// Create a placeholder value with the same type as LI to use as the
	// basis for the new value. This allows us to replace the uses of LI with
	// the computed value, and then replace the placeholder with LI, leaving
	// LI only used for this computation.
	Value *Placeholder = new LoadInst(
	LI.getType(), PoisonValue::get(LI.getType()->getPointerTo(AS)), "",
	false, Align(1));
	V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
	"insert");
	LI.replaceAllUsesWith(V);
	Placeholder->replaceAllUsesWith(&LI);
	Placeholder->deleteValue();
	} else {
	LI.replaceAllUsesWith(V);
	}

	Pass.DeadInsts.push_back(&LI);
	deleteIfTriviallyDead(OldOp);
	LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
	return !LI.isVolatile() && !IsPtrAdjusted;
	}

	bool rewriteVectorizedStoreInst(Value V, StoreInst &SI, Value OldOp,
	AAMDNodes AATags) {
	if (V->getType() != VecTy) {
	unsigned BeginIndex = getIndex(NewBeginOffset);
	unsigned EndIndex = getIndex(NewEndOffset);
	assert(EndIndex > BeginIndex && "Empty vector!");
	unsigned NumElements = EndIndex - BeginIndex;
	assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
	"Too many elements!");
	Type *SliceTy = (NumElements == 1)
	? ElementTy
	: FixedVectorType::get(ElementTy, NumElements);
	if (V->getType() != SliceTy)
	V = convertValue(DL, IRB, V, SliceTy);

	// Mix in the existing elements.
	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "load");
	V = insertVector(IRB, Old, V, BeginIndex, "vec");
	}
	StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
	Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	Pass.DeadInsts.push_back(&SI);

	LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
	return true;
	}

	bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
	assert(IntTy && "We cannot extract an integer from the alloca");
	assert(!SI.isVolatile());
	if (DL.getTypeSizeInBits(V->getType()).getFixedSize() !=
	IntTy->getBitWidth()) {
	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "oldload");
	Old = convertValue(DL, IRB, Old, IntTy);
	assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
	uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
	V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
	}
	V = convertValue(DL, IRB, V, NewAllocaTy);
	StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
	Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	Pass.DeadInsts.push_back(&SI);
	LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
	return true;
	}

	bool visitStoreInst(StoreInst &SI) {
	LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
	Value *OldOp = SI.getOperand(1);
	assert(OldOp == OldPtr);

	AAMDNodes AATags = SI.getAAMetadata();
	Value *V = SI.getValueOperand();

	// Strip all inbounds GEPs and pointer casts to try to dig out any root
	// alloca that should be re-examined after promoting this alloca.
	if (V->getType()->isPointerTy())
	if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
	Pass.PostPromotionWorklist.insert(AI);

	if (SliceSize < DL.getTypeStoreSize(V->getType()).getFixedSize()) {
	assert(!SI.isVolatile());
	assert(V->getType()->isIntegerTy() &&
	"Only integer type loads and stores are split");
	assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
	"Non-byte-multiple bit width");
	IntegerType NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize 8);
	V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
	"extract");
	}

	if (VecTy)
	return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
	if (IntTy && V->getType()->isIntegerTy())
	return rewriteIntegerStore(V, SI, AATags);

	const bool IsStorePastEnd =
	DL.getTypeStoreSize(V->getType()).getFixedSize() > SliceSize;
	StoreInst *NewSI;
	if (NewBeginOffset == NewAllocaBeginOffset &&
	NewEndOffset == NewAllocaEndOffset &&
	(canConvertValue(DL, V->getType(), NewAllocaTy) \|\|
	(IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
	V->getType()->isIntegerTy()))) {
	// If this is an integer store past the end of slice (and thus the bytes
	// past that point are irrelevant or this is unreachable), truncate the
	// value prior to storing.
	if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
	if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
	if (VITy->getBitWidth() > AITy->getBitWidth()) {
	if (DL.isBigEndian())
	V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
	"endian_shift");
	V = IRB.CreateTrunc(V, AITy, "load.trunc");
	}

	V = convertValue(DL, IRB, V, NewAllocaTy);
	NewSI =
	IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), SI.isVolatile());
	} else {
	unsigned AS = SI.getPointerAddressSpace();
	Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo(AS));
	NewSI =
	IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
	}
	NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	if (SI.isVolatile())
	NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
	if (NewSI->isAtomic())
	NewSI->setAlignment(SI.getAlign());
	Pass.DeadInsts.push_back(&SI);
	deleteIfTriviallyDead(OldOp);

	LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
	return NewSI->getPointerOperand() == &NewAI &&
	NewSI->getValueOperand()->getType() == NewAllocaTy &&
	!SI.isVolatile();
	}

	/// Compute an integer value from splatting an i8 across the given
	/// number of bytes.
	///
	/// Note that this routine assumes an i8 is a byte. If that isn't true, don't
	/// call this routine.
	/// FIXME: Heed the advice above.
	///
	/// \param V The i8 value to splat.
	/// \param Size The number of bytes in the output (assuming i8 is one byte)
	Value getIntegerSplat(Value V, unsigned Size) {
	assert(Size > 0 && "Expected a positive number of bytes.");
	IntegerType *VTy = cast<IntegerType>(V->getType());
	assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
	if (Size == 1)
	return V;

	Type SplatIntTy = Type::getIntNTy(VTy->getContext(), Size 8);
	V = IRB.CreateMul(
	IRB.CreateZExt(V, SplatIntTy, "zext"),
	IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
	IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
	SplatIntTy)),
	"isplat");
	return V;
	}

	/// Compute a vector splat for a given element value.
	Value getVectorSplat(Value V, unsigned NumElements) {
	V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
	LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
	return V;
	}

	bool visitMemSetInst(MemSetInst &II) {
	LLVM_DEBUG(dbgs() << " original: " << II << "\n");
	assert(II.getRawDest() == OldPtr);

	AAMDNodes AATags = II.getAAMetadata();

	// If the memset has a variable size, it cannot be split, just adjust the
	// pointer to the new alloca.
	if (!isa<ConstantInt>(II.getLength())) {
	assert(!IsSplit);
	assert(NewBeginOffset == BeginOffset);
	II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
	II.setDestAlignment(getSliceAlign());

	deleteIfTriviallyDead(OldPtr);
	return false;
	}

	// Record this instruction for deletion.
	Pass.DeadInsts.push_back(&II);

	Type *AllocaTy = NewAI.getAllocatedType();
	Type *ScalarTy = AllocaTy->getScalarType();

	const bool CanContinue = [&]() {
	if (VecTy \|\| IntTy)
	return true;
	if (BeginOffset > NewAllocaBeginOffset \|\|
	EndOffset < NewAllocaEndOffset)
	return false;
	// Length must be in range for FixedVectorType.
	auto *C = cast<ConstantInt>(II.getLength());
	const uint64_t Len = C->getLimitedValue();
	if (Len > std::numeric_limits<unsigned>::max())
	return false;
	auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
	auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
	return canConvertValue(DL, SrcTy, AllocaTy) &&
	DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedSize());
	}();

	// If this doesn't map cleanly onto the alloca type, and that type isn't
	// a single value type, just emit a memset.
	if (!CanContinue) {
	Type *SizeTy = II.getLength()->getType();
	Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
	CallInst *New = IRB.CreateMemSet(
	getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
	MaybeAlign(getSliceAlign()), II.isVolatile());
	if (AATags)
	New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
	return false;
	}

	// If we can represent this as a simple value, we have to build the actual
	// value to store, which requires expanding the byte present in memset to
	// a sensible representation for the alloca type. This is essentially
	// splatting the byte to a sufficiently wide integer, splatting it across
	// any desired vector width, and bitcasting to the final type.
	Value *V;

	if (VecTy) {
	// If this is a memset of a vectorized alloca, insert it.
	assert(ElementTy == ScalarTy);

	unsigned BeginIndex = getIndex(NewBeginOffset);
	unsigned EndIndex = getIndex(NewEndOffset);
	assert(EndIndex > BeginIndex && "Empty vector!");
	unsigned NumElements = EndIndex - BeginIndex;
	assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
	"Too many elements!");

	Value *Splat = getIntegerSplat(
	II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8);
	Splat = convertValue(DL, IRB, Splat, ElementTy);
	if (NumElements > 1)
	Splat = getVectorSplat(Splat, NumElements);

	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "oldload");
	V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
	} else if (IntTy) {
	// If this is a memset on an alloca where we can widen stores, insert the
	// set integer.
	assert(!II.isVolatile());

	uint64_t Size = NewEndOffset - NewBeginOffset;
	V = getIntegerSplat(II.getValue(), Size);

	if (IntTy && (BeginOffset != NewAllocaBeginOffset \|\|
	EndOffset != NewAllocaBeginOffset)) {
	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "oldload");
	Old = convertValue(DL, IRB, Old, IntTy);
	uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
	V = insertInteger(DL, IRB, Old, V, Offset, "insert");
	} else {
	assert(V->getType() == IntTy &&
	"Wrong type for an alloca wide integer!");
	}
	V = convertValue(DL, IRB, V, AllocaTy);
	} else {
	// Established these invariants above.
	assert(NewBeginOffset == NewAllocaBeginOffset);
	assert(NewEndOffset == NewAllocaEndOffset);

	V = getIntegerSplat(II.getValue(),
	DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8);
	if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
	V = getVectorSplat(
	V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());

	V = convertValue(DL, IRB, V, AllocaTy);
	}

	StoreInst *New =
	IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign(), II.isVolatile());
	New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
	return !II.isVolatile();
	}

	bool visitMemTransferInst(MemTransferInst &II) {
	// Rewriting of memory transfer instructions can be a bit tricky. We break
	// them into two categories: split intrinsics and unsplit intrinsics.

	LLVM_DEBUG(dbgs() << " original: " << II << "\n");

	AAMDNodes AATags = II.getAAMetadata();

	bool IsDest = &II.getRawDestUse() == OldUse;
	assert((IsDest && II.getRawDest() == OldPtr) \|\|
	(!IsDest && II.getRawSource() == OldPtr));

	Align SliceAlign = getSliceAlign();

	// For unsplit intrinsics, we simply modify the source and destination
	// pointers in place. This isn't just an optimization, it is a matter of
	// correctness. With unsplit intrinsics we may be dealing with transfers
	// within a single alloca before SROA ran, or with transfers that have
	// a variable length. We may also be dealing with memmove instead of
	// memcpy, and so simply updating the pointers is the necessary for us to
	// update both source and dest of a single call.
	if (!IsSplittable) {
	Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
	if (IsDest) {
	II.setDest(AdjustedPtr);
	II.setDestAlignment(SliceAlign);
	}
	else {
	II.setSource(AdjustedPtr);
	II.setSourceAlignment(SliceAlign);
	}

	LLVM_DEBUG(dbgs() << " to: " << II << "\n");
	deleteIfTriviallyDead(OldPtr);
	return false;
	}
	// For split transfer intrinsics we have an incredibly useful assurance:
	// the source and destination do not reside within the same alloca, and at
	// least one of them does not escape. This means that we can replace
	// memmove with memcpy, and we don't need to worry about all manner of
	// downsides to splitting and transforming the operations.

	// If this doesn't map cleanly onto the alloca type, and that type isn't
	// a single value type, just emit a memcpy.
	bool EmitMemCpy =
	!VecTy && !IntTy &&
	(BeginOffset > NewAllocaBeginOffset \|\| EndOffset < NewAllocaEndOffset \|\|
	SliceSize !=
	DL.getTypeStoreSize(NewAI.getAllocatedType()).getFixedSize() \|\|
	!NewAI.getAllocatedType()->isSingleValueType());

	// If we're just going to emit a memcpy, the alloca hasn't changed, and the
	// size hasn't been shrunk based on analysis of the viable range, this is
	// a no-op.
	if (EmitMemCpy && &OldAI == &NewAI) {
	// Ensure the start lines up.
	assert(NewBeginOffset == BeginOffset);

	// Rewrite the size as needed.
	if (NewEndOffset != EndOffset)
	II.setLength(ConstantInt::get(II.getLength()->getType(),
	NewEndOffset - NewBeginOffset));
	return false;
	}
	// Record this instruction for deletion.
	Pass.DeadInsts.push_back(&II);

	// Strip all inbounds GEPs and pointer casts to try to dig out any root
	// alloca that should be re-examined after rewriting this instruction.
	Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
	if (AllocaInst *AI =
	dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
	assert(AI != &OldAI && AI != &NewAI &&
	"Splittable transfers cannot reach the same alloca on both ends.");
	Pass.Worklist.insert(AI);
	}

	Type *OtherPtrTy = OtherPtr->getType();
	unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();

	// Compute the relative offset for the other pointer within the transfer.
	unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
	APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
	Align OtherAlign =
	(IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
	OtherAlign =
	commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());

	if (EmitMemCpy) {
	// Compute the other pointer, folding as much as possible to produce
	// a single, simple GEP in most cases.
	OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
	OtherPtr->getName() + ".");

	Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
	Type *SizeTy = II.getLength()->getType();
	Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);

	Value DestPtr, SrcPtr;
	MaybeAlign DestAlign, SrcAlign;
	// Note: IsDest is true iff we're copying into the new alloca slice
	if (IsDest) {
	DestPtr = OurPtr;
	DestAlign = SliceAlign;
	SrcPtr = OtherPtr;
	SrcAlign = OtherAlign;
	} else {
	DestPtr = OtherPtr;
	DestAlign = OtherAlign;
	SrcPtr = OurPtr;
	SrcAlign = SliceAlign;
	}
	CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
	Size, II.isVolatile());
	if (AATags)
	New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
	return false;
	}

	bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
	NewEndOffset == NewAllocaEndOffset;
	uint64_t Size = NewEndOffset - NewBeginOffset;
	unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
	unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
	unsigned NumElements = EndIndex - BeginIndex;
	IntegerType *SubIntTy =
	IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;

	// Reset the other pointer type to match the register type we're going to
	// use, but using the address space of the original other pointer.
	Type *OtherTy;
	if (VecTy && !IsWholeAlloca) {
	if (NumElements == 1)
	OtherTy = VecTy->getElementType();
	else
	OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
	} else if (IntTy && !IsWholeAlloca) {
	OtherTy = SubIntTy;
	} else {
	OtherTy = NewAllocaTy;
	}
	OtherPtrTy = OtherTy->getPointerTo(OtherAS);

	Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
	OtherPtr->getName() + ".");
	MaybeAlign SrcAlign = OtherAlign;
	Value *DstPtr = &NewAI;
	MaybeAlign DstAlign = SliceAlign;
	if (!IsDest) {
	std::swap(SrcPtr, DstPtr);
	std::swap(SrcAlign, DstAlign);
	}

	Value *Src;
	if (VecTy && !IsWholeAlloca && !IsDest) {
	Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "load");
	Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
	} else if (IntTy && !IsWholeAlloca && !IsDest) {
	Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "load");
	Src = convertValue(DL, IRB, Src, IntTy);
	uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
	Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
	} else {
	LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
	II.isVolatile(), "copyload");
	Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	Src = Load;
	}

	if (VecTy && !IsWholeAlloca && IsDest) {
	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "oldload");
	Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
	} else if (IntTy && !IsWholeAlloca && IsDest) {
	Value *Old = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI,
	NewAI.getAlign(), "oldload");
	Old = convertValue(DL, IRB, Old, IntTy);
	uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
	Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
	Src = convertValue(DL, IRB, Src, NewAllocaTy);
	}

	StoreInst *Store = cast<StoreInst>(
	IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
	Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	if (AATags)
	Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
	LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
	return !II.isVolatile();
	}

	bool visitIntrinsicInst(IntrinsicInst &II) {
	assert((II.isLifetimeStartOrEnd() \|\| II.isDroppable()) &&
	"Unexpected intrinsic!");
	LLVM_DEBUG(dbgs() << " original: " << II << "\n");

	// Record this instruction for deletion.
	Pass.DeadInsts.push_back(&II);

	if (II.isDroppable()) {
	assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
	// TODO For now we forget assumed information, this can be improved.
	OldPtr->dropDroppableUsesIn(II);
	return true;
	}

	assert(II.getArgOperand(1) == OldPtr);
	// Lifetime intrinsics are only promotable if they cover the whole alloca.
	// Therefore, we drop lifetime intrinsics which don't cover the whole
	// alloca.
	// (In theory, intrinsics which partially cover an alloca could be
	// promoted, but PromoteMemToReg doesn't handle that case.)
	// FIXME: Check whether the alloca is promotable before dropping the
	// lifetime intrinsics?
	if (NewBeginOffset != NewAllocaBeginOffset \|\|
	NewEndOffset != NewAllocaEndOffset)
	return true;

	ConstantInt *Size =
	ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
	NewEndOffset - NewBeginOffset);
	// Lifetime intrinsics always expect an i8* so directly get such a pointer
	// for the new alloca slice.
	Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
	Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
	Value *New;
	if (II.getIntrinsicID() == Intrinsic::lifetime_start)
	New = IRB.CreateLifetimeStart(Ptr, Size);
	else
	New = IRB.CreateLifetimeEnd(Ptr, Size);

	(void)New;
	LLVM_DEBUG(dbgs() << " to: " << *New << "\n");

	return true;
	}

	void fixLoadStoreAlign(Instruction &Root) {
	// This algorithm implements the same visitor loop as
	// hasUnsafePHIOrSelectUse, and fixes the alignment of each load
	// or store found.
	SmallPtrSet<Instruction *, 4> Visited;
	SmallVector<Instruction *, 4> Uses;
	Visited.insert(&Root);
	Uses.push_back(&Root);
	do {
	Instruction *I = Uses.pop_back_val();

	if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
	LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
	continue;
	}
	if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
	SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
	continue;
	}

	assert(isa<BitCastInst>(I) \|\| isa<AddrSpaceCastInst>(I) \|\|
	isa<PHINode>(I) \|\| isa<SelectInst>(I) \|\|
	isa<GetElementPtrInst>(I));
	for (User *U : I->users())
	if (Visited.insert(cast<Instruction>(U)).second)
	Uses.push_back(cast<Instruction>(U));
	} while (!Uses.empty());
	}

	bool visitPHINode(PHINode &PN) {
	LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
	assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
	assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");

	// We would like to compute a new pointer in only one place, but have it be
	// as local as possible to the PHI. To do that, we re-use the location of
	// the old pointer, which necessarily must be in the right position to
	// dominate the PHI.
	IRBuilderBase::InsertPointGuard Guard(IRB);
	if (isa<PHINode>(OldPtr))
	IRB.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
	else
	IRB.SetInsertPoint(OldPtr);
	IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());

	Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
	// Replace the operands which were using the old pointer.
	std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);

	LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
	deleteIfTriviallyDead(OldPtr);

	// Fix the alignment of any loads or stores using this PHI node.
	fixLoadStoreAlign(PN);

	// PHIs can't be promoted on their own, but often can be speculated. We
	// check the speculation outside of the rewriter so that we see the
	// fully-rewritten alloca.
	PHIUsers.insert(&PN);
	return true;
	}

	bool visitSelectInst(SelectInst &SI) {
	LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
	assert((SI.getTrueValue() == OldPtr \|\| SI.getFalseValue() == OldPtr) &&
	"Pointer isn't an operand!");
	assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
	assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");

	Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
	// Replace the operands which were using the old pointer.
	if (SI.getOperand(1) == OldPtr)
	SI.setOperand(1, NewPtr);
	if (SI.getOperand(2) == OldPtr)
	SI.setOperand(2, NewPtr);

	LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
	deleteIfTriviallyDead(OldPtr);

	// Fix the alignment of any loads or stores using this select.
	fixLoadStoreAlign(SI);

	// Selects can't be promoted on their own, but often can be speculated. We
	// check the speculation outside of the rewriter so that we see the
	// fully-rewritten alloca.
	SelectUsers.insert(&SI);
	return true;
	}
	};

	namespace {

	/// Visitor to rewrite aggregate loads and stores as scalar.
	///
	/// This pass aggressively rewrites all aggregate loads and stores on
	/// a particular pointer (or any pointer derived from it which we can identify)
	/// with scalar loads and stores.
	class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
	// Befriend the base class so it can delegate to private visit methods.
	friend class InstVisitor<AggLoadStoreRewriter, bool>;

	/// Queue of pointer uses to analyze and potentially rewrite.
	SmallVector<Use *, 8> Queue;

	/// Set to prevent us from cycling with phi nodes and loops.
	SmallPtrSet<User *, 8> Visited;

	/// The current pointer use being rewritten. This is used to dig up the used
	/// value (as opposed to the user).
	Use *U = nullptr;

	/// Used to calculate offsets, and hence alignment, of subobjects.
	const DataLayout &DL;

	IRBuilderTy &IRB;

	public:
	AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
	: DL(DL), IRB(IRB) {}

	/// Rewrite loads and stores through a pointer and all pointers derived from
	/// it.
	bool rewrite(Instruction &I) {
	LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
	enqueueUsers(I);
	bool Changed = false;
	while (!Queue.empty()) {
	U = Queue.pop_back_val();
	Changed \|= visit(cast<Instruction>(U->getUser()));
	}
	return Changed;
	}

	private:
	/// Enqueue all the users of the given instruction for further processing.
	/// This uses a set to de-duplicate users.
	void enqueueUsers(Instruction &I) {
	for (Use &U : I.uses())
	if (Visited.insert(U.getUser()).second)
	Queue.push_back(&U);
	}

	// Conservative default is to not rewrite anything.
	bool visitInstruction(Instruction &I) { return false; }

	/// Generic recursive split emission class.
	template <typename Derived> class OpSplitter {
	protected:
	/// The builder used to form new instructions.
	IRBuilderTy &IRB;

	/// The indices which to be used with insert- or extractvalue to select the
	/// appropriate value within the aggregate.
	SmallVector<unsigned, 4> Indices;

	/// The indices to a GEP instruction which will move Ptr to the correct slot
	/// within the aggregate.
	SmallVector<Value *, 4> GEPIndices;

	/// The base pointer of the original op, used as a base for GEPing the
	/// split operations.
	Value *Ptr;

	/// The base pointee type being GEPed into.
	Type *BaseTy;

	/// Known alignment of the base pointer.
	Align BaseAlign;

	/// To calculate offset of each component so we can correctly deduce
	/// alignments.
	const DataLayout &DL;

	/// Initialize the splitter with an insertion point, Ptr and start with a
	/// single zero GEP index.
	OpSplitter(Instruction InsertionPoint, Value Ptr, Type *BaseTy,
	Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
	: IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
	BaseAlign(BaseAlign), DL(DL) {
	IRB.SetInsertPoint(InsertionPoint);
	}

	public:
	/// Generic recursive split emission routine.
	///
	/// This method recursively splits an aggregate op (load or store) into
	/// scalar or vector ops. It splits recursively until it hits a single value
	/// and emits that single value operation via the template argument.
	///
	/// The logic of this routine relies on GEPs and insertvalue and
	/// extractvalue all operating with the same fundamental index list, merely
	/// formatted differently (GEPs need actual values).
	///
	/// \param Ty The type being split recursively into smaller ops.
	/// \param Agg The aggregate value being built up or stored, depending on
	/// whether this is splitting a load or a store respectively.
	void emitSplitOps(Type Ty, Value &Agg, const Twine &Name) {
	if (Ty->isSingleValueType()) {
	unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
	return static_cast<Derived *>(this)->emitFunc(
	Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
	}

	if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
	unsigned OldSize = Indices.size();
	(void)OldSize;
	for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
	++Idx) {
	assert(Indices.size() == OldSize && "Did not return to the old size");
	Indices.push_back(Idx);
	GEPIndices.push_back(IRB.getInt32(Idx));
	emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
	GEPIndices.pop_back();
	Indices.pop_back();
	}
	return;
	}

	if (StructType *STy = dyn_cast<StructType>(Ty)) {
	unsigned OldSize = Indices.size();
	(void)OldSize;
	for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
	++Idx) {
	assert(Indices.size() == OldSize && "Did not return to the old size");
	Indices.push_back(Idx);
	GEPIndices.push_back(IRB.getInt32(Idx));
	emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
	GEPIndices.pop_back();
	Indices.pop_back();
	}
	return;
	}

	llvm_unreachable("Only arrays and structs are aggregate loadable types");
	}
	};

	struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
	AAMDNodes AATags;

	LoadOpSplitter(Instruction InsertionPoint, Value Ptr, Type *BaseTy,
	AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
	IRBuilderTy &IRB)
	: OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
	IRB),
	AATags(AATags) {}

	/// Emit a leaf load of a single value. This is called at the leaves of the
	/// recursive emission to actually load values.
	void emitFunc(Type Ty, Value &Agg, Align Alignment, const Twine &Name) {
	assert(Ty->isSingleValueType());
	// Load the single value and insert it using the indices.
	Value *GEP =
	IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
	LoadInst *Load =
	IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");

	APInt Offset(
	DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
	if (AATags &&
	GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
	Load->setAAMetadata(AATags.shift(Offset.getZExtValue()));

	Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
	LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
	}
	};

	bool visitLoadInst(LoadInst &LI) {
	assert(LI.getPointerOperand() == *U);
	if (!LI.isSimple() \|\| LI.getType()->isSingleValueType())
	return false;

	// We have an aggregate being loaded, split it apart.
	LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
	LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
	getAdjustedAlignment(&LI, 0), DL, IRB);
	Value *V = PoisonValue::get(LI.getType());
	Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
	Visited.erase(&LI);
	LI.replaceAllUsesWith(V);
	LI.eraseFromParent();
	return true;
	}

	struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
	StoreOpSplitter(Instruction InsertionPoint, Value Ptr, Type *BaseTy,
	AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
	IRBuilderTy &IRB)
	: OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
	DL, IRB),
	AATags(AATags) {}
	AAMDNodes AATags;
	/// Emit a leaf store of a single value. This is called at the leaves of the
	/// recursive emission to actually produce stores.
	void emitFunc(Type Ty, Value &Agg, Align Alignment, const Twine &Name) {
	assert(Ty->isSingleValueType());
	// Extract the single value and store it using the indices.
	//
	// The gep and extractvalue values are factored out of the CreateStore
	// call to make the output independent of the argument evaluation order.
	Value *ExtractValue =
	IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
	Value *InBoundsGEP =
	IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
	StoreInst *Store =
	IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);

	APInt Offset(
	DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
	if (AATags &&
	GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
	Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));

	LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
	}
	};

	bool visitStoreInst(StoreInst &SI) {
	if (!SI.isSimple() \|\| SI.getPointerOperand() != *U)
	return false;
	Value *V = SI.getValueOperand();
	if (V->getType()->isSingleValueType())
	return false;

	// We have an aggregate being stored, split it apart.
	LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
	StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
	getAdjustedAlignment(&SI, 0), DL, IRB);
	Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
	Visited.erase(&SI);
	SI.eraseFromParent();
	return true;
	}

	bool visitBitCastInst(BitCastInst &BC) {
	enqueueUsers(BC);
	return false;
	}

	bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
	enqueueUsers(ASC);
	return false;
	}

	// Fold gep (select cond, ptr1, ptr2) => select cond, gep(ptr1), gep(ptr2)
	bool foldGEPSelect(GetElementPtrInst &GEPI) {
	if (!GEPI.hasAllConstantIndices())
	return false;

	SelectInst *Sel = cast<SelectInst>(GEPI.getPointerOperand());

	LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):"
	<< "\n original: " << *Sel
	<< "\n " << GEPI);

	IRB.SetInsertPoint(&GEPI);
	SmallVector<Value *, 4> Index(GEPI.indices());
	bool IsInBounds = GEPI.isInBounds();

	Type *Ty = GEPI.getSourceElementType();
	Value *True = Sel->getTrueValue();
	Value *NTrue = IRB.CreateGEP(Ty, True, Index, True->getName() + ".sroa.gep",
	IsInBounds);

	Value *False = Sel->getFalseValue();

	Value *NFalse = IRB.CreateGEP(Ty, False, Index,
	False->getName() + ".sroa.gep", IsInBounds);

	Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
	Sel->getName() + ".sroa.sel");
	Visited.erase(&GEPI);
	GEPI.replaceAllUsesWith(NSel);
	GEPI.eraseFromParent();
	Instruction *NSelI = cast<Instruction>(NSel);
	Visited.insert(NSelI);
	enqueueUsers(*NSelI);

	LLVM_DEBUG(dbgs() << "\n to: " << *NTrue
	<< "\n " << *NFalse
	<< "\n " << *NSel << '\n');

	return true;
	}

	// Fold gep (phi ptr1, ptr2) => phi gep(ptr1), gep(ptr2)
	bool foldGEPPhi(GetElementPtrInst &GEPI) {
	if (!GEPI.hasAllConstantIndices())
	return false;

	PHINode *PHI = cast<PHINode>(GEPI.getPointerOperand());
	if (GEPI.getParent() != PHI->getParent() \|\|
	llvm::any_of(PHI->incoming_values(), [](Value *In)
	{ Instruction *I = dyn_cast<Instruction>(In);
	return !I \|\| isa<GetElementPtrInst>(I) \|\| isa<PHINode>(I) \|\|
	succ_empty(I->getParent()) \|\|
	!I->getParent()->isLegalToHoistInto();
	}))
	return false;

	LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):"
	<< "\n original: " << *PHI
	<< "\n " << GEPI
	<< "\n to: ");

	SmallVector<Value *, 4> Index(GEPI.indices());
	bool IsInBounds = GEPI.isInBounds();
	IRB.SetInsertPoint(GEPI.getParent()->getFirstNonPHI());
	PHINode *NewPN = IRB.CreatePHI(GEPI.getType(), PHI->getNumIncomingValues(),
	PHI->getName() + ".sroa.phi");
	for (unsigned I = 0, E = PHI->getNumIncomingValues(); I != E; ++I) {
	BasicBlock *B = PHI->getIncomingBlock(I);
	Value *NewVal = nullptr;
	int Idx = NewPN->getBasicBlockIndex(B);
	if (Idx >= 0) {
	NewVal = NewPN->getIncomingValue(Idx);
	} else {
	Instruction *In = cast<Instruction>(PHI->getIncomingValue(I));

	IRB.SetInsertPoint(In->getParent(), std::next(In->getIterator()));
	Type *Ty = GEPI.getSourceElementType();
	NewVal = IRB.CreateGEP(Ty, In, Index, In->getName() + ".sroa.gep",
	IsInBounds);
	}
	NewPN->addIncoming(NewVal, B);
	}

	Visited.erase(&GEPI);
	GEPI.replaceAllUsesWith(NewPN);
	GEPI.eraseFromParent();
	Visited.insert(NewPN);
	enqueueUsers(*NewPN);

	LLVM_DEBUG(for (Value *In : NewPN->incoming_values())
	dbgs() << "\n " << *In;
	dbgs() << "\n " << *NewPN << '\n');

	return true;
	}

	bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
	if (isa<SelectInst>(GEPI.getPointerOperand()) &&
	foldGEPSelect(GEPI))
	return true;

	if (isa<PHINode>(GEPI.getPointerOperand()) &&
	foldGEPPhi(GEPI))
	return true;

	enqueueUsers(GEPI);
	return false;
	}

	bool visitPHINode(PHINode &PN) {
	enqueueUsers(PN);
	return false;
	}

	bool visitSelectInst(SelectInst &SI) {
	enqueueUsers(SI);
	return false;
	}
	};

	} // end anonymous namespace

	/// Strip aggregate type wrapping.
	///
	/// This removes no-op aggregate types wrapping an underlying type. It will
	/// strip as many layers of types as it can without changing either the type
	/// size or the allocated size.
	static Type stripAggregateTypeWrapping(const DataLayout &DL, Type Ty) {
	if (Ty->isSingleValueType())
	return Ty;

	uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedSize();
	uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedSize();

	Type *InnerTy;
	if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
	InnerTy = ArrTy->getElementType();
	} else if (StructType *STy = dyn_cast<StructType>(Ty)) {
	const StructLayout *SL = DL.getStructLayout(STy);
	unsigned Index = SL->getElementContainingOffset(0);
	InnerTy = STy->getElementType(Index);
	} else {
	return Ty;
	}

	if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedSize() \|\|
	TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedSize())
	return Ty;

	return stripAggregateTypeWrapping(DL, InnerTy);
	}

	/// Try to find a partition of the aggregate type passed in for a given
	/// offset and size.
	///
	/// This recurses through the aggregate type and tries to compute a subtype
	/// based on the offset and size. When the offset and size span a sub-section
	/// of an array, it will even compute a new array type for that sub-section,
	/// and the same for structs.
	///
	/// Note that this routine is very strict and tries to find a partition of the
	/// type which produces the exact right offset and size. It is not forgiving
	/// when the size or offset cause either end of type-based partition to be off.
	/// Also, this is a best-effort routine. It is reasonable to give up and not
	/// return a type if necessary.
	static Type getTypePartition(const DataLayout &DL, Type Ty, uint64_t Offset,
	uint64_t Size) {
	if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedSize() == Size)
	return stripAggregateTypeWrapping(DL, Ty);
	if (Offset > DL.getTypeAllocSize(Ty).getFixedSize() \|\|
	(DL.getTypeAllocSize(Ty).getFixedSize() - Offset) < Size)
	return nullptr;

	if (isa<ArrayType>(Ty) \|\| isa<VectorType>(Ty)) {
	Type *ElementTy;
	uint64_t TyNumElements;
	if (auto *AT = dyn_cast<ArrayType>(Ty)) {
	ElementTy = AT->getElementType();
	TyNumElements = AT->getNumElements();
	} else {
	// FIXME: This isn't right for vectors with non-byte-sized or
	// non-power-of-two sized elements.
	auto *VT = cast<FixedVectorType>(Ty);
	ElementTy = VT->getElementType();
	TyNumElements = VT->getNumElements();
	}
	uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
	uint64_t NumSkippedElements = Offset / ElementSize;
	if (NumSkippedElements >= TyNumElements)
	return nullptr;
	Offset -= NumSkippedElements * ElementSize;

	// First check if we need to recurse.
	if (Offset > 0 \|\| Size < ElementSize) {
	// Bail if the partition ends in a different array element.
	if ((Offset + Size) > ElementSize)
	return nullptr;
	// Recurse through the element type trying to peel off offset bytes.
	return getTypePartition(DL, ElementTy, Offset, Size);
	}
	assert(Offset == 0);

	if (Size == ElementSize)
	return stripAggregateTypeWrapping(DL, ElementTy);
	assert(Size > ElementSize);
	uint64_t NumElements = Size / ElementSize;
	if (NumElements * ElementSize != Size)
	return nullptr;
	return ArrayType::get(ElementTy, NumElements);
	}

	StructType *STy = dyn_cast<StructType>(Ty);
	if (!STy)
	return nullptr;

	const StructLayout *SL = DL.getStructLayout(STy);
	if (Offset >= SL->getSizeInBytes())
	return nullptr;
	uint64_t EndOffset = Offset + Size;
	if (EndOffset > SL->getSizeInBytes())
	return nullptr;

	unsigned Index = SL->getElementContainingOffset(Offset);
	Offset -= SL->getElementOffset(Index);

	Type *ElementTy = STy->getElementType(Index);
	uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedSize();
	if (Offset >= ElementSize)
	return nullptr; // The offset points into alignment padding.

	// See if any partition must be contained by the element.
	if (Offset > 0 \|\| Size < ElementSize) {
	if ((Offset + Size) > ElementSize)
	return nullptr;
	return getTypePartition(DL, ElementTy, Offset, Size);
	}
	assert(Offset == 0);

	if (Size == ElementSize)
	return stripAggregateTypeWrapping(DL, ElementTy);

	StructType::element_iterator EI = STy->element_begin() + Index,
	EE = STy->element_end();
	if (EndOffset < SL->getSizeInBytes()) {
	unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
	if (Index == EndIndex)
	return nullptr; // Within a single element and its padding.

	// Don't try to form "natural" types if the elements don't line up with the
	// expected size.
	// FIXME: We could potentially recurse down through the last element in the
	// sub-struct to find a natural end point.
	if (SL->getElementOffset(EndIndex) != EndOffset)
	return nullptr;

	assert(Index < EndIndex);
	EE = STy->element_begin() + EndIndex;
	}

	// Try to build up a sub-structure.
	StructType *SubTy =
	StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
	const StructLayout *SubSL = DL.getStructLayout(SubTy);
	if (Size != SubSL->getSizeInBytes())
	return nullptr; // The sub-struct doesn't have quite the size needed.

	return SubTy;
	}

	/// Pre-split loads and stores to simplify rewriting.
	///
	/// We want to break up the splittable load+store pairs as much as
	/// possible. This is important to do as a preprocessing step, as once we
	/// start rewriting the accesses to partitions of the alloca we lose the
	/// necessary information to correctly split apart paired loads and stores
	/// which both point into this alloca. The case to consider is something like
	/// the following:
	///
	/// %a = alloca [12 x i8]
	/// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
	/// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
	/// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
	/// %iptr1 = bitcast i8* %gep1 to i64*
	/// %iptr2 = bitcast i8* %gep2 to i64*
	/// %fptr1 = bitcast i8* %gep1 to float*
	/// %fptr2 = bitcast i8* %gep2 to float*
	/// %fptr3 = bitcast i8* %gep3 to float*
	/// store float 0.0, float* %fptr1
	/// store float 1.0, float* %fptr2
	/// %v = load i64* %iptr1
	/// store i64 %v, i64* %iptr2
	/// %f1 = load float* %fptr2
	/// %f2 = load float* %fptr3
	///
	/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
	/// promote everything so we recover the 2 SSA values that should have been
	/// there all along.
	///
	/// \returns true if any changes are made.
	bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
	LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");

	// Track the loads and stores which are candidates for pre-splitting here, in
	// the order they first appear during the partition scan. These give stable
	// iteration order and a basis for tracking which loads and stores we
	// actually split.
	SmallVector<LoadInst *, 4> Loads;
	SmallVector<StoreInst *, 4> Stores;

	// We need to accumulate the splits required of each load or store where we
	// can find them via a direct lookup. This is important to cross-check loads
	// and stores against each other. We also track the slice so that we can kill
	// all the slices that end up split.
	struct SplitOffsets {
	Slice *S;
	std::vector<uint64_t> Splits;
	};
	SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;

	// Track loads out of this alloca which cannot, for any reason, be pre-split.
	// This is important as we also cannot pre-split stores of those loads!
	// FIXME: This is all pretty gross. It means that we can be more aggressive
	// in pre-splitting when the load feeding the store happens to come from
	// a separate alloca. Put another way, the effectiveness of SROA would be
	// decreased by a frontend which just concatenated all of its local allocas
	// into one big flat alloca. But defeating such patterns is exactly the job
	// SROA is tasked with! Sadly, to not have this discrepancy we would have
	// change store pre-splitting to actually force pre-splitting of the load
	// that feeds it and all stores. That makes pre-splitting much harder, but
	// maybe it would make it more principled?
	SmallPtrSet<LoadInst *, 8> UnsplittableLoads;

	LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
	for (auto &P : AS.partitions()) {
	for (Slice &S : P) {
	Instruction *I = cast<Instruction>(S.getUse()->getUser());
	if (!S.isSplittable() \|\| S.endOffset() <= P.endOffset()) {
	// If this is a load we have to track that it can't participate in any
	// pre-splitting. If this is a store of a load we have to track that
	// that load also can't participate in any pre-splitting.
	if (auto *LI = dyn_cast<LoadInst>(I))
	UnsplittableLoads.insert(LI);
	else if (auto *SI = dyn_cast<StoreInst>(I))
	if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
	UnsplittableLoads.insert(LI);
	continue;
	}
	assert(P.endOffset() > S.beginOffset() &&
	"Empty or backwards partition!");

	// Determine if this is a pre-splittable slice.
	if (auto *LI = dyn_cast<LoadInst>(I)) {
	assert(!LI->isVolatile() && "Cannot split volatile loads!");

	// The load must be used exclusively to store into other pointers for
	// us to be able to arbitrarily pre-split it. The stores must also be
	// simple to avoid changing semantics.
	auto IsLoadSimplyStored = [](LoadInst *LI) {
	for (User *LU : LI->users()) {
	auto *SI = dyn_cast<StoreInst>(LU);
	if (!SI \|\| !SI->isSimple())
	return false;
	}
	return true;
	};
	if (!IsLoadSimplyStored(LI)) {
	UnsplittableLoads.insert(LI);
	continue;
	}

	Loads.push_back(LI);
	} else if (auto *SI = dyn_cast<StoreInst>(I)) {
	if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
	// Skip stores of pointers. FIXME: This shouldn't even be possible!
	continue;
	auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
	if (!StoredLoad \|\| !StoredLoad->isSimple())
	continue;
	assert(!SI->isVolatile() && "Cannot split volatile stores!");

	Stores.push_back(SI);
	} else {
	// Other uses cannot be pre-split.
	continue;
	}

	// Record the initial split.
	LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
	auto &Offsets = SplitOffsetsMap[I];
	assert(Offsets.Splits.empty() &&
	"Should not have splits the first time we see an instruction!");
	Offsets.S = &S;
	Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
	}

	// Now scan the already split slices, and add a split for any of them which
	// we're going to pre-split.
	for (Slice *S : P.splitSliceTails()) {
	auto SplitOffsetsMapI =
	SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
	if (SplitOffsetsMapI == SplitOffsetsMap.end())
	continue;
	auto &Offsets = SplitOffsetsMapI->second;

	assert(Offsets.S == S && "Found a mismatched slice!");
	assert(!Offsets.Splits.empty() &&
	"Cannot have an empty set of splits on the second partition!");
	assert(Offsets.Splits.back() ==
	P.beginOffset() - Offsets.S->beginOffset() &&
	"Previous split does not end where this one begins!");

	// Record each split. The last partition's end isn't needed as the size
	// of the slice dictates that.
	if (S->endOffset() > P.endOffset())
	Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
	}
	}

	// We may have split loads where some of their stores are split stores. For
	// such loads and stores, we can only pre-split them if their splits exactly
	// match relative to their starting offset. We have to verify this prior to
	// any rewriting.
	llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
	// Lookup the load we are storing in our map of split
	// offsets.
	auto *LI = cast<LoadInst>(SI->getValueOperand());
	// If it was completely unsplittable, then we're done,
	// and this store can't be pre-split.
	if (UnsplittableLoads.count(LI))
	return true;

	auto LoadOffsetsI = SplitOffsetsMap.find(LI);
	if (LoadOffsetsI == SplitOffsetsMap.end())
	return false; // Unrelated loads are definitely safe.
	auto &LoadOffsets = LoadOffsetsI->second;

	// Now lookup the store's offsets.
	auto &StoreOffsets = SplitOffsetsMap[SI];

	// If the relative offsets of each split in the load and
	// store match exactly, then we can split them and we
	// don't need to remove them here.
	if (LoadOffsets.Splits == StoreOffsets.Splits)
	return false;

	LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
	<< " " << *LI << "\n"
	<< " " << *SI << "\n");

	// We've found a store and load that we need to split
	// with mismatched relative splits. Just give up on them
	// and remove both instructions from our list of
	// candidates.
	UnsplittableLoads.insert(LI);
	return true;
	});
	// Now we have to go back through all the stores, because a later store may
	// have caused an earlier store's load to become unsplittable and if it is
	// unsplittable for the later store, then we can't rely on it being split in
	// the earlier store either.
	llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
	auto *LI = cast<LoadInst>(SI->getValueOperand());
	return UnsplittableLoads.count(LI);
	});
	// Once we've established all the loads that can't be split for some reason,
	// filter any that made it into our list out.
	llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
	return UnsplittableLoads.count(LI);
	});

	// If no loads or stores are left, there is no pre-splitting to be done for
	// this alloca.
	if (Loads.empty() && Stores.empty())
	return false;

	// From here on, we can't fail and will be building new accesses, so rig up
	// an IR builder.
	IRBuilderTy IRB(&AI);

	// Collect the new slices which we will merge into the alloca slices.
	SmallVector<Slice, 4> NewSlices;

	// Track any allocas we end up splitting loads and stores for so we iterate
	// on them.
	SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;

	// At this point, we have collected all of the loads and stores we can
	// pre-split, and the specific splits needed for them. We actually do the
	// splitting in a specific order in order to handle when one of the loads in
	// the value operand to one of the stores.
	//
	// First, we rewrite all of the split loads, and just accumulate each split
	// load in a parallel structure. We also build the slices for them and append
	// them to the alloca slices.
	SmallDenseMap<LoadInst , std::vector<LoadInst >, 1> SplitLoadsMap;
	std::vector<LoadInst *> SplitLoads;
	const DataLayout &DL = AI.getModule()->getDataLayout();
	for (LoadInst *LI : Loads) {
	SplitLoads.clear();

	auto &Offsets = SplitOffsetsMap[LI];
	unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
	assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
	"Load must have type size equal to store size");
	assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
	"Load must be >= slice size");

	uint64_t BaseOffset = Offsets.S->beginOffset();
	assert(BaseOffset + SliceSize > BaseOffset &&
	"Cannot represent alloca access size using 64-bit integers!");

	Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
	IRB.SetInsertPoint(LI);

	LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");

	uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
	int Idx = 0, Size = Offsets.Splits.size();
	for (;;) {
	auto PartTy = Type::getIntNTy(LI->getContext(), PartSize 8);
	auto AS = LI->getPointerAddressSpace();
	auto *PartPtrTy = PartTy->getPointerTo(AS);
	LoadInst *PLoad = IRB.CreateAlignedLoad(
	PartTy,
	getAdjustedPtr(IRB, DL, BasePtr,
	APInt(DL.getIndexSizeInBits(AS), PartOffset),
	PartPtrTy, BasePtr->getName() + "."),
	getAdjustedAlignment(LI, PartOffset),
	/IsVolatile/ false, LI->getName());
	PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});

	// Append this load onto the list of split loads so we can find it later
	// to rewrite the stores.
	SplitLoads.push_back(PLoad);

	// Now build a new slice for the alloca.
	NewSlices.push_back(
	Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
	&PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
	/IsSplittable/ false));
	LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
	<< ", " << NewSlices.back().endOffset()
	<< "): " << *PLoad << "\n");

	// See if we've handled all the splits.
	if (Idx >= Size)
	break;

	// Setup the next partition.
	PartOffset = Offsets.Splits[Idx];
	++Idx;
	PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
	}

	// Now that we have the split loads, do the slow walk over all uses of the
	// load and rewrite them as split stores, or save the split loads to use
	// below if the store is going to be split there anyways.
	bool DeferredStores = false;
	for (User *LU : LI->users()) {
	StoreInst *SI = cast<StoreInst>(LU);
	if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
	DeferredStores = true;
	LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
	<< "\n");
	continue;
	}

	Value *StoreBasePtr = SI->getPointerOperand();
	IRB.SetInsertPoint(SI);

	LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");

	for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
	LoadInst *PLoad = SplitLoads[Idx];
	uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
	auto *PartPtrTy =
	PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());

	auto AS = SI->getPointerAddressSpace();
	StoreInst *PStore = IRB.CreateAlignedStore(
	PLoad,
	getAdjustedPtr(IRB, DL, StoreBasePtr,
	APInt(DL.getIndexSizeInBits(AS), PartOffset),
	PartPtrTy, StoreBasePtr->getName() + "."),
	getAdjustedAlignment(SI, PartOffset),
	/IsVolatile/ false);
	PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
	}

	// We want to immediately iterate on any allocas impacted by splitting
	// this store, and we have to track any promotable alloca (indicated by
	// a direct store) as needing to be resplit because it is no longer
	// promotable.
	if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
	ResplitPromotableAllocas.insert(OtherAI);
	Worklist.insert(OtherAI);
	} else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
	StoreBasePtr->stripInBoundsOffsets())) {
	Worklist.insert(OtherAI);
	}

	// Mark the original store as dead.
	DeadInsts.push_back(SI);
	}

	// Save the split loads if there are deferred stores among the users.
	if (DeferredStores)
	SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));

	// Mark the original load as dead and kill the original slice.
	DeadInsts.push_back(LI);
	Offsets.S->kill();
	}

	// Second, we rewrite all of the split stores. At this point, we know that
	// all loads from this alloca have been split already. For stores of such
	// loads, we can simply look up the pre-existing split loads. For stores of
	// other loads, we split those loads first and then write split stores of
	// them.
	for (StoreInst *SI : Stores) {
	auto *LI = cast<LoadInst>(SI->getValueOperand());
	IntegerType *Ty = cast<IntegerType>(LI->getType());
	assert(Ty->getBitWidth() % 8 == 0);
	uint64_t StoreSize = Ty->getBitWidth() / 8;
	assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");

	auto &Offsets = SplitOffsetsMap[SI];
	assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
	"Slice size should always match load size exactly!");
	uint64_t BaseOffset = Offsets.S->beginOffset();
	assert(BaseOffset + StoreSize > BaseOffset &&
	"Cannot represent alloca access size using 64-bit integers!");

	Value *LoadBasePtr = LI->getPointerOperand();
	Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());

	LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");

	// Check whether we have an already split load.
	auto SplitLoadsMapI = SplitLoadsMap.find(LI);
	std::vector<LoadInst > SplitLoads = nullptr;
	if (SplitLoadsMapI != SplitLoadsMap.end()) {
	SplitLoads = &SplitLoadsMapI->second;
	assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
	"Too few split loads for the number of splits in the store!");
	} else {
	LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
	}

	uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
	int Idx = 0, Size = Offsets.Splits.size();
	for (;;) {
	auto PartTy = Type::getIntNTy(Ty->getContext(), PartSize 8);
	auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
	auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());

	// Either lookup a split load or create one.
	LoadInst *PLoad;
	if (SplitLoads) {
	PLoad = (*SplitLoads)[Idx];
	} else {
	IRB.SetInsertPoint(LI);
	auto AS = LI->getPointerAddressSpace();
	PLoad = IRB.CreateAlignedLoad(
	PartTy,
	getAdjustedPtr(IRB, DL, LoadBasePtr,
	APInt(DL.getIndexSizeInBits(AS), PartOffset),
	LoadPartPtrTy, LoadBasePtr->getName() + "."),
	getAdjustedAlignment(LI, PartOffset),
	/IsVolatile/ false, LI->getName());
	PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});
	}

	// And store this partition.
	IRB.SetInsertPoint(SI);
	auto AS = SI->getPointerAddressSpace();
	StoreInst *PStore = IRB.CreateAlignedStore(
	PLoad,
	getAdjustedPtr(IRB, DL, StoreBasePtr,
	APInt(DL.getIndexSizeInBits(AS), PartOffset),
	StorePartPtrTy, StoreBasePtr->getName() + "."),
	getAdjustedAlignment(SI, PartOffset),
	/IsVolatile/ false);
	PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
	LLVMContext::MD_access_group});

	// Now build a new slice for the alloca.
	NewSlices.push_back(
	Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
	&PStore->getOperandUse(PStore->getPointerOperandIndex()),
	/IsSplittable/ false));
	LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
	<< ", " << NewSlices.back().endOffset()
	<< "): " << *PStore << "\n");
	if (!SplitLoads) {
	LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
	}

	// See if we've finished all the splits.
	if (Idx >= Size)
	break;

	// Setup the next partition.
	PartOffset = Offsets.Splits[Idx];
	++Idx;
	PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
	}

	// We want to immediately iterate on any allocas impacted by splitting
	// this load, which is only relevant if it isn't a load of this alloca and
	// thus we didn't already split the loads above. We also have to keep track
	// of any promotable allocas we split loads on as they can no longer be
	// promoted.
	if (!SplitLoads) {
	if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
	assert(OtherAI != &AI && "We can't re-split our own alloca!");
	ResplitPromotableAllocas.insert(OtherAI);
	Worklist.insert(OtherAI);
	} else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
	LoadBasePtr->stripInBoundsOffsets())) {
	assert(OtherAI != &AI && "We can't re-split our own alloca!");
	Worklist.insert(OtherAI);
	}
	}

	// Mark the original store as dead now that we've split it up and kill its
	// slice. Note that we leave the original load in place unless this store
	// was its only use. It may in turn be split up if it is an alloca load
	// for some other alloca, but it may be a normal load. This may introduce
	// redundant loads, but where those can be merged the rest of the optimizer
	// should handle the merging, and this uncovers SSA splits which is more
	// important. In practice, the original loads will almost always be fully
	// split and removed eventually, and the splits will be merged by any
	// trivial CSE, including instcombine.
	if (LI->hasOneUse()) {
	assert(*LI->user_begin() == SI && "Single use isn't this store!");
	DeadInsts.push_back(LI);
	}
	DeadInsts.push_back(SI);
	Offsets.S->kill();
	}

	// Remove the killed slices that have ben pre-split.
	llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });

	// Insert our new slices. This will sort and merge them into the sorted
	// sequence.
	AS.insert(NewSlices);

	LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
	#ifndef NDEBUG
	for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
	LLVM_DEBUG(AS.print(dbgs(), I, " "));
	#endif

	// Finally, don't try to promote any allocas that new require re-splitting.
	// They have already been added to the worklist above.
	llvm::erase_if(PromotableAllocas, [&](AllocaInst *AI) {
	return ResplitPromotableAllocas.count(AI);
	});

	return true;
	}

	/// Rewrite an alloca partition's users.
	///
	/// This routine drives both of the rewriting goals of the SROA pass. It tries
	/// to rewrite uses of an alloca partition to be conducive for SSA value
	/// promotion. If the partition needs a new, more refined alloca, this will
	/// build that new alloca, preserving as much type information as possible, and
	/// rewrite the uses of the old alloca to point at the new one and have the
	/// appropriate new offsets. It also evaluates how successful the rewrite was
	/// at enabling promotion and if it was successful queues the alloca to be
	/// promoted.
	AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
	Partition &P) {
	// Try to compute a friendly type for this partition of the alloca. This
	// won't always succeed, in which case we fall back to a legal integer type
	// or an i8 array of an appropriate size.
	Type *SliceTy = nullptr;
	const DataLayout &DL = AI.getModule()->getDataLayout();
	std::pair<Type , IntegerType > CommonUseTy =
	findCommonType(P.begin(), P.end(), P.endOffset());
	// Do all uses operate on the same type?
	if (CommonUseTy.first)
	if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
	SliceTy = CommonUseTy.first;
	// If not, can we find an appropriate subtype in the original allocated type?
	if (!SliceTy)
	if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
	P.beginOffset(), P.size()))
	SliceTy = TypePartitionTy;
	// If still not, can we use the largest bitwidth integer type used?
	if (!SliceTy && CommonUseTy.second)
	if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
	SliceTy = CommonUseTy.second;
	if ((!SliceTy \|\| (SliceTy->isArrayTy() &&
	SliceTy->getArrayElementType()->isIntegerTy())) &&
	DL.isLegalInteger(P.size() * 8))
	SliceTy = Type::getIntNTy(C, P.size() 8);
	if (!SliceTy)
	SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
	assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size());

	bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);

	VectorType *VecTy =
	IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
	if (VecTy)
	SliceTy = VecTy;

	// Check for the case where we're going to rewrite to a new alloca of the
	// exact same type as the original, and with the same access offsets. In that
	// case, re-use the existing alloca, but still run through the rewriter to
	// perform phi and select speculation.
	// P.beginOffset() can be non-zero even with the same type in a case with
	// out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
	AllocaInst *NewAI;
	if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
	NewAI = &AI;
	// FIXME: We should be able to bail at this point with "nothing changed".
	// FIXME: We might want to defer PHI speculation until after here.
	// FIXME: return nullptr;
	} else {
	// Make sure the alignment is compatible with P.beginOffset().
	const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
	// If we will get at least this much alignment from the type alone, leave
	// the alloca's alignment unconstrained.
	const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
	NewAI = new AllocaInst(
	SliceTy, AI.getType()->getAddressSpace(), nullptr,
	IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
	AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
	// Copy the old AI debug location over to the new one.
	NewAI->setDebugLoc(AI.getDebugLoc());
	++NumNewAllocas;
	}

	LLVM_DEBUG(dbgs() << "Rewriting alloca partition "
	<< "[" << P.beginOffset() << "," << P.endOffset()
	<< ") to: " << *NewAI << "\n");

	// Track the high watermark on the worklist as it is only relevant for
	// promoted allocas. We will reset it to this point if the alloca is not in
	// fact scheduled for promotion.
	unsigned PPWOldSize = PostPromotionWorklist.size();
	unsigned NumUses = 0;
	SmallSetVector<PHINode *, 8> PHIUsers;
	SmallSetVector<SelectInst *, 8> SelectUsers;

	AllocaSliceRewriter Rewriter(DL, AS, this, AI, NewAI, P.beginOffset(),
	P.endOffset(), IsIntegerPromotable, VecTy,
	PHIUsers, SelectUsers);
	bool Promotable = true;
	for (Slice *S : P.splitSliceTails()) {
	Promotable &= Rewriter.visit(S);
	++NumUses;
	}
	for (Slice &S : P) {
	Promotable &= Rewriter.visit(&S);
	++NumUses;
	}

	NumAllocaPartitionUses += NumUses;
	MaxUsesPerAllocaPartition.updateMax(NumUses);

	// Now that we've processed all the slices in the new partition, check if any
	// PHIs or Selects would block promotion.
	for (PHINode *PHI : PHIUsers)
	if (!isSafePHIToSpeculate(*PHI)) {
	Promotable = false;
	PHIUsers.clear();
	SelectUsers.clear();
	break;
	}

	for (SelectInst *Sel : SelectUsers)
	if (!isSafeSelectToSpeculate(*Sel)) {
	Promotable = false;
	PHIUsers.clear();
	SelectUsers.clear();
	break;
	}

	if (Promotable) {
	for (Use *U : AS.getDeadUsesIfPromotable()) {
	auto *OldInst = dyn_cast<Instruction>(U->get());
	Value::dropDroppableUse(*U);
	if (OldInst)
	if (isInstructionTriviallyDead(OldInst))
	DeadInsts.push_back(OldInst);
	}
	if (PHIUsers.empty() && SelectUsers.empty()) {
	// Promote the alloca.
	PromotableAllocas.push_back(NewAI);
	} else {
	// If we have either PHIs or Selects to speculate, add them to those
	// worklists and re-queue the new alloca so that we promote in on the
	// next iteration.
	for (PHINode *PHIUser : PHIUsers)
	SpeculatablePHIs.insert(PHIUser);
	for (SelectInst *SelectUser : SelectUsers)
	SpeculatableSelects.insert(SelectUser);
	Worklist.insert(NewAI);
	}
	} else {
	// Drop any post-promotion work items if promotion didn't happen.
	while (PostPromotionWorklist.size() > PPWOldSize)
	PostPromotionWorklist.pop_back();

	// We couldn't promote and we didn't create a new partition, nothing
	// happened.
	if (NewAI == &AI)
	return nullptr;

	// If we can't promote the alloca, iterate on it to check for new
	// refinements exposed by splitting the current alloca. Don't iterate on an
	// alloca which didn't actually change and didn't get promoted.
	Worklist.insert(NewAI);
	}

	return NewAI;
	}

	/// Walks the slices of an alloca and form partitions based on them,
	/// rewriting each of their uses.
	bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
	if (AS.begin() == AS.end())
	return false;

	unsigned NumPartitions = 0;
	bool Changed = false;
	const DataLayout &DL = AI.getModule()->getDataLayout();

	// First try to pre-split loads and stores.
	Changed \|= presplitLoadsAndStores(AI, AS);

	// Now that we have identified any pre-splitting opportunities,
	// mark loads and stores unsplittable except for the following case.
	// We leave a slice splittable if all other slices are disjoint or fully
	// included in the slice, such as whole-alloca loads and stores.
	// If we fail to split these during pre-splitting, we want to force them
	// to be rewritten into a partition.
	bool IsSorted = true;

	uint64_t AllocaSize =
	DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize();
	const uint64_t MaxBitVectorSize = 1024;
	if (AllocaSize <= MaxBitVectorSize) {
	// If a byte boundary is included in any load or store, a slice starting or
	// ending at the boundary is not splittable.
	SmallBitVector SplittableOffset(AllocaSize + 1, true);
	for (Slice &S : AS)
	for (unsigned O = S.beginOffset() + 1;
	O < S.endOffset() && O < AllocaSize; O++)
	SplittableOffset.reset(O);

	for (Slice &S : AS) {
	if (!S.isSplittable())
	continue;

	if ((S.beginOffset() > AllocaSize \|\| SplittableOffset[S.beginOffset()]) &&
	(S.endOffset() > AllocaSize \|\| SplittableOffset[S.endOffset()]))
	continue;

	if (isa<LoadInst>(S.getUse()->getUser()) \|\|
	isa<StoreInst>(S.getUse()->getUser())) {
	S.makeUnsplittable();
	IsSorted = false;
	}
	}
	}
	else {
	// We only allow whole-alloca splittable loads and stores
	// for a large alloca to avoid creating too large BitVector.
	for (Slice &S : AS) {
	if (!S.isSplittable())
	continue;

	if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
	continue;

	if (isa<LoadInst>(S.getUse()->getUser()) \|\|
	isa<StoreInst>(S.getUse()->getUser())) {
	S.makeUnsplittable();
	IsSorted = false;
	}
	}
	}

	if (!IsSorted)
	llvm::sort(AS);

	/// Describes the allocas introduced by rewritePartition in order to migrate
	/// the debug info.
	struct Fragment {
	AllocaInst *Alloca;
	uint64_t Offset;
	uint64_t Size;
	Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
	: Alloca(AI), Offset(O), Size(S) {}
	};
	SmallVector<Fragment, 4> Fragments;

	// Rewrite each partition.
	for (auto &P : AS.partitions()) {
	if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
	Changed = true;
	if (NewAI != &AI) {
	uint64_t SizeOfByte = 8;
	uint64_t AllocaSize =
	DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedSize();
	// Don't include any padding.
	uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
	Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
	}
	}
	++NumPartitions;
	}

	NumAllocaPartitions += NumPartitions;
	MaxPartitionsPerAlloca.updateMax(NumPartitions);

	// Migrate debug information from the old alloca to the new alloca(s)
	// and the individual partitions.
	TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
	for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
	auto *Expr = DbgDeclare->getExpression();
	DIBuilder DIB(AI.getModule(), /AllowUnresolved*/ false);
	uint64_t AllocaSize =
	DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedSize();
	for (auto Fragment : Fragments) {
	// Create a fragment expression describing the new partition or reuse AI's
	// expression if there is only one partition.
	auto *FragmentExpr = Expr;
	if (Fragment.Size < AllocaSize \|\| Expr->isFragment()) {
	// If this alloca is already a scalar replacement of a larger aggregate,
	// Fragment.Offset describes the offset inside the scalar.
	auto ExprFragment = Expr->getFragmentInfo();
	uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
	uint64_t Start = Offset + Fragment.Offset;
	uint64_t Size = Fragment.Size;
	if (ExprFragment) {
	uint64_t AbsEnd =
	ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
	if (Start >= AbsEnd)
	// No need to describe a SROAed padding.
	continue;
	Size = std::min(Size, AbsEnd - Start);
	}
	// The new, smaller fragment is stenciled out from the old fragment.
	if (auto OrigFragment = FragmentExpr->getFragmentInfo()) {
	assert(Start >= OrigFragment->OffsetInBits &&
	"new fragment is outside of original fragment");
	Start -= OrigFragment->OffsetInBits;
	}

	// The alloca may be larger than the variable.
	auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
	if (VarSize) {
	if (Size > *VarSize)
	Size = *VarSize;
	if (Size == 0 \|\| Start + Size > *VarSize)
	continue;
	}

	// Avoid creating a fragment expression that covers the entire variable.
	if (!VarSize \|\| *VarSize != Size) {
	if (auto E =
	DIExpression::createFragmentExpression(Expr, Start, Size))
	FragmentExpr = *E;
	else
	continue;
	}
	}

	// Remove any existing intrinsics on the new alloca describing
	// the variable fragment.
	for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
	auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
	const DbgVariableIntrinsic *RHS) {
	return LHS->getVariable() == RHS->getVariable() &&
	LHS->getDebugLoc()->getInlinedAt() ==
	RHS->getDebugLoc()->getInlinedAt();
	};
	if (SameVariableFragment(OldDII, DbgDeclare))
	OldDII->eraseFromParent();
	}

	DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
	DbgDeclare->getDebugLoc(), &AI);
	}
	}
	return Changed;
	}

	/// Clobber a use with poison, deleting the used value if it becomes dead.
	void SROAPass::clobberUse(Use &U) {
	Value *OldV = U;
	// Replace the use with an poison value.
	U = PoisonValue::get(OldV->getType());

	// Check for this making an instruction dead. We have to garbage collect
	// all the dead instructions to ensure the uses of any alloca end up being
	// minimal.
	if (Instruction *OldI = dyn_cast<Instruction>(OldV))
	if (isInstructionTriviallyDead(OldI)) {
	DeadInsts.push_back(OldI);
	}
	}

	/// Analyze an alloca for SROA.
	///
	/// This analyzes the alloca to ensure we can reason about it, builds
	/// the slices of the alloca, and then hands it off to be split and
	/// rewritten as needed.
	bool SROAPass::runOnAlloca(AllocaInst &AI) {
	LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
	++NumAllocasAnalyzed;

	// Special case dead allocas, as they're trivial.
	if (AI.use_empty()) {
	AI.eraseFromParent();
	return true;
	}
	const DataLayout &DL = AI.getModule()->getDataLayout();

	// Skip alloca forms that this analysis can't handle.
	auto *AT = AI.getAllocatedType();
	if (AI.isArrayAllocation() \|\| !AT->isSized() \|\| isa<ScalableVectorType>(AT) \|\|
	DL.getTypeAllocSize(AT).getFixedSize() == 0)
	return false;

	bool Changed = false;

	// First, split any FCA loads and stores touching this alloca to promote
	// better splitting and promotion opportunities.
	IRBuilderTy IRB(&AI);
	AggLoadStoreRewriter AggRewriter(DL, IRB);
	Changed \|= AggRewriter.rewrite(AI);

	// Build the slices using a recursive instruction-visiting builder.
	AllocaSlices AS(DL, AI);
	LLVM_DEBUG(AS.print(dbgs()));
	if (AS.isEscaped())
	return Changed;

	// Delete all the dead users of this alloca before splitting and rewriting it.
	for (Instruction *DeadUser : AS.getDeadUsers()) {
	// Free up everything used by this instruction.
	for (Use &DeadOp : DeadUser->operands())
	clobberUse(DeadOp);

	// Now replace the uses of this instruction.
	DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));

	// And mark it for deletion.
	DeadInsts.push_back(DeadUser);
	Changed = true;
	}
	for (Use *DeadOp : AS.getDeadOperands()) {
	clobberUse(*DeadOp);
	Changed = true;
	}

	// No slices to split. Leave the dead alloca for a later pass to clean up.
	if (AS.begin() == AS.end())
	return Changed;

	Changed \|= splitAlloca(AI, AS);

	LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
	while (!SpeculatablePHIs.empty())
	speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());

	LLVM_DEBUG(dbgs() << " Speculating Selects\n");
	while (!SpeculatableSelects.empty())
	speculateSelectInstLoads(IRB, *SpeculatableSelects.pop_back_val());

	return Changed;
	}

	/// Delete the dead instructions accumulated in this run.
	///
	/// Recursively deletes the dead instructions we've accumulated. This is done
	/// at the very end to maximize locality of the recursive delete and to
	/// minimize the problems of invalidated instruction pointers as such pointers
	/// are used heavily in the intermediate stages of the algorithm.
	///
	/// We also record the alloca instructions deleted here so that they aren't
	/// subsequently handed to mem2reg to promote.
	bool SROAPass::deleteDeadInstructions(
	SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
	bool Changed = false;
	while (!DeadInsts.empty()) {
	Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
	if (!I) continue;
	LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");

	// If the instruction is an alloca, find the possible dbg.declare connected
	// to it, and remove it too. We must do this before calling RAUW or we will
	// not be able to find it.
	if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
	DeletedAllocas.insert(AI);
	for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
	OldDII->eraseFromParent();
	}

	I->replaceAllUsesWith(UndefValue::get(I->getType()));

	for (Use &Operand : I->operands())
	if (Instruction *U = dyn_cast<Instruction>(Operand)) {
	// Zero out the operand and see if it becomes trivially dead.
	Operand = nullptr;
	if (isInstructionTriviallyDead(U))
	DeadInsts.push_back(U);
	}

	++NumDeleted;
	I->eraseFromParent();
	Changed = true;
	}
	return Changed;
	}

	/// Promote the allocas, using the best available technique.
	///
	/// This attempts to promote whatever allocas have been identified as viable in
	/// the PromotableAllocas list. If that list is empty, there is nothing to do.
	/// This function returns whether any promotion occurred.
	bool SROAPass::promoteAllocas(Function &F) {
	if (PromotableAllocas.empty())
	return false;

	NumPromoted += PromotableAllocas.size();

	LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
	PromoteMemToReg(PromotableAllocas, *DT, AC);
	PromotableAllocas.clear();
	return true;
	}

	PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
	AssumptionCache &RunAC) {
	LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
	C = &F.getContext();
	DT = &RunDT;
	AC = &RunAC;

	BasicBlock &EntryBB = F.getEntryBlock();
	for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
	I != E; ++I) {
	if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
	if (isa<ScalableVectorType>(AI->getAllocatedType())) {
	if (isAllocaPromotable(AI))
	PromotableAllocas.push_back(AI);
	} else {
	Worklist.insert(AI);
	}
	}
	}

	bool Changed = false;
	// A set of deleted alloca instruction pointers which should be removed from
	// the list of promotable allocas.
	SmallPtrSet<AllocaInst *, 4> DeletedAllocas;

	do {
	while (!Worklist.empty()) {
	Changed \|= runOnAlloca(*Worklist.pop_back_val());
	Changed \|= deleteDeadInstructions(DeletedAllocas);

	// Remove the deleted allocas from various lists so that we don't try to
	// continue processing them.
	if (!DeletedAllocas.empty()) {
	auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
	Worklist.remove_if(IsInSet);
	PostPromotionWorklist.remove_if(IsInSet);
	llvm::erase_if(PromotableAllocas, IsInSet);
	DeletedAllocas.clear();
	}
	}

	Changed \|= promoteAllocas(F);

	Worklist = PostPromotionWorklist;
	PostPromotionWorklist.clear();
	} while (!Worklist.empty());

	if (!Changed)
	return PreservedAnalyses::all();

	PreservedAnalyses PA;
	PA.preserveSet<CFGAnalyses>();
	return PA;
	}

	PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
	return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
	AM.getResult<AssumptionAnalysis>(F));
	}

	/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
	///
	/// This is in the llvm namespace purely to allow it to be a friend of the \c
	/// SROA pass.
	class llvm::sroa::SROALegacyPass : public FunctionPass {
	/// The SROA implementation.
	SROAPass Impl;

	public:
	static char ID;

	SROALegacyPass() : FunctionPass(ID) {
	initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
	}

	bool runOnFunction(Function &F) override {
	if (skipFunction(F))
	return false;

	auto PA = Impl.runImpl(
	F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
	getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
	return !PA.areAllPreserved();
	}

	void getAnalysisUsage(AnalysisUsage &AU) const override {
	AU.addRequired<AssumptionCacheTracker>();
	AU.addRequired<DominatorTreeWrapperPass>();
	AU.addPreserved<GlobalsAAWrapperPass>();
	AU.setPreservesCFG();
	}

	StringRef getPassName() const override { return "SROA"; }
	};

	char SROALegacyPass::ID = 0;

	FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }

	INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
	"Scalar Replacement Of Aggregates", false, false)
	INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
	INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
	INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
	false, false)
	diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
	index 42be67f3cfc0..264da2187754 100644
	--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
	+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
	@@ -1,590 +1,590 @@
	#include "llvm/Transforms/Utils/VNCoercion.h"
	#include "llvm/Analysis/ConstantFolding.h"
	#include "llvm/Analysis/ValueTracking.h"
	#include "llvm/IR/IRBuilder.h"
	#include "llvm/IR/IntrinsicInst.h"
	#include "llvm/Support/Debug.h"

	#define DEBUG_TYPE "vncoerce"

	namespace llvm {
	namespace VNCoercion {

	static bool isFirstClassAggregateOrScalableType(Type *Ty) {
	return Ty->isStructTy() \|\| Ty->isArrayTy() \|\| isa<ScalableVectorType>(Ty);
	}

	/// Return true if coerceAvailableValueToLoadType will succeed.
	bool canCoerceMustAliasedValueToLoad(Value StoredVal, Type LoadTy,
	const DataLayout &DL) {
	Type *StoredTy = StoredVal->getType();

	if (StoredTy == LoadTy)
	return true;

	// If the loaded/stored value is a first class array/struct, or scalable type,
	// don't try to transform them. We need to be able to bitcast to integer.
	if (isFirstClassAggregateOrScalableType(LoadTy) \|\|
	isFirstClassAggregateOrScalableType(StoredTy))
	return false;

	uint64_t StoreSize = DL.getTypeSizeInBits(StoredTy).getFixedSize();

	// The store size must be byte-aligned to support future type casts.
	if (llvm::alignTo(StoreSize, 8) != StoreSize)
	return false;

	// The store has to be at least as big as the load.
	if (StoreSize < DL.getTypeSizeInBits(LoadTy).getFixedSize())
	return false;

	bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
	bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
	// Don't coerce non-integral pointers to integers or vice versa.
	if (StoredNI != LoadNI) {
	// As a special case, allow coercion of memset used to initialize
	// an array w/null. Despite non-integral pointers not generally having a
	// specific bit pattern, we do assume null is zero.
	if (auto *CI = dyn_cast<Constant>(StoredVal))
	return CI->isNullValue();
	return false;
	} else if (StoredNI && LoadNI &&
	StoredTy->getPointerAddressSpace() !=
	LoadTy->getPointerAddressSpace()) {
	return false;
	}


	// The implementation below uses inttoptr for vectors of unequal size; we
	// can't allow this for non integral pointers. We could teach it to extract
	// exact subvectors if desired.
	if (StoredNI && StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize())
	return false;

	return true;
	}

	/// If we saw a store of a value to memory, and
	/// then a load from a must-aliased pointer of a different type, try to coerce
	/// the stored value. LoadedTy is the type of the load we want to replace.
	/// IRB is IRBuilder used to insert new instructions.
	///
	/// If we can't do it, return null.
	Value coerceAvailableValueToLoadType(Value StoredVal, Type *LoadedTy,
	IRBuilderBase &Helper,
	const DataLayout &DL) {
	assert(canCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
	"precondition violation - materialization can't fail");
	if (auto *C = dyn_cast<Constant>(StoredVal))
	StoredVal = ConstantFoldConstant(C, DL);

	// If this is already the right type, just return it.
	Type *StoredValTy = StoredVal->getType();

	uint64_t StoredValSize = DL.getTypeSizeInBits(StoredValTy).getFixedSize();
	uint64_t LoadedValSize = DL.getTypeSizeInBits(LoadedTy).getFixedSize();

	// If the store and reload are the same size, we can always reuse it.
	if (StoredValSize == LoadedValSize) {
	// Pointer to Pointer -> use bitcast.
	if (StoredValTy->isPtrOrPtrVectorTy() && LoadedTy->isPtrOrPtrVectorTy()) {
	StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
	} else {
	// Convert source pointers to integers, which can be bitcast.
	if (StoredValTy->isPtrOrPtrVectorTy()) {
	StoredValTy = DL.getIntPtrType(StoredValTy);
	StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
	}

	Type *TypeToCastTo = LoadedTy;
	if (TypeToCastTo->isPtrOrPtrVectorTy())
	TypeToCastTo = DL.getIntPtrType(TypeToCastTo);

	if (StoredValTy != TypeToCastTo)
	StoredVal = Helper.CreateBitCast(StoredVal, TypeToCastTo);

	// Cast to pointer if the load needs a pointer type.
	if (LoadedTy->isPtrOrPtrVectorTy())
	StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
	}

	if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
	StoredVal = ConstantFoldConstant(C, DL);

	return StoredVal;
	}
	// If the loaded value is smaller than the available value, then we can
	// extract out a piece from it. If the available value is too small, then we
	// can't do anything.
	assert(StoredValSize >= LoadedValSize &&
	"canCoerceMustAliasedValueToLoad fail");

	// Convert source pointers to integers, which can be manipulated.
	if (StoredValTy->isPtrOrPtrVectorTy()) {
	StoredValTy = DL.getIntPtrType(StoredValTy);
	StoredVal = Helper.CreatePtrToInt(StoredVal, StoredValTy);
	}

	// Convert vectors and fp to integer, which can be manipulated.
	if (!StoredValTy->isIntegerTy()) {
	StoredValTy = IntegerType::get(StoredValTy->getContext(), StoredValSize);
	StoredVal = Helper.CreateBitCast(StoredVal, StoredValTy);
	}

	// If this is a big-endian system, we need to shift the value down to the low
	// bits so that a truncate will work.
	if (DL.isBigEndian()) {
	uint64_t ShiftAmt = DL.getTypeStoreSizeInBits(StoredValTy).getFixedSize() -
	DL.getTypeStoreSizeInBits(LoadedTy).getFixedSize();
	StoredVal = Helper.CreateLShr(
	StoredVal, ConstantInt::get(StoredVal->getType(), ShiftAmt));
	}

	// Truncate the integer to the right size now.
	Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadedValSize);
	StoredVal = Helper.CreateTruncOrBitCast(StoredVal, NewIntTy);

	if (LoadedTy != NewIntTy) {
	// If the result is a pointer, inttoptr.
	if (LoadedTy->isPtrOrPtrVectorTy())
	StoredVal = Helper.CreateIntToPtr(StoredVal, LoadedTy);
	else
	// Otherwise, bitcast.
	StoredVal = Helper.CreateBitCast(StoredVal, LoadedTy);
	}

	if (auto *C = dyn_cast<Constant>(StoredVal))
	StoredVal = ConstantFoldConstant(C, DL);

	return StoredVal;
	}

	/// This function is called when we have a memdep query of a load that ends up
	/// being a clobbering memory write (store, memset, memcpy, memmove). This
	/// means that the write may provide bits used by the load but we can't be
	/// sure because the pointers don't must-alias.
	///
	/// Check this case to see if there is anything more we can do before we give
	/// up. This returns -1 if we have to give up, or a byte number in the stored
	/// value of the piece that feeds the load.
	static int analyzeLoadFromClobberingWrite(Type LoadTy, Value LoadPtr,
	Value *WritePtr,
	uint64_t WriteSizeInBits,
	const DataLayout &DL) {
	// If the loaded/stored value is a first class array/struct, or scalable type,
	// don't try to transform them. We need to be able to bitcast to integer.
	if (isFirstClassAggregateOrScalableType(LoadTy))
	return -1;

	int64_t StoreOffset = 0, LoadOffset = 0;
	Value *StoreBase =
	GetPointerBaseWithConstantOffset(WritePtr, StoreOffset, DL);
	Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, DL);
	if (StoreBase != LoadBase)
	return -1;

	uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize();

	if ((WriteSizeInBits & 7) \| (LoadSize & 7))
	return -1;
	uint64_t StoreSize = WriteSizeInBits / 8; // Convert to bytes.
	LoadSize /= 8;

	// If the Load isn't completely contained within the stored bits, we don't
	// have all the bits to feed it. We could do something crazy in the future
	// (issue a smaller load then merge the bits in) but this seems unlikely to be
	// valuable.
	if (StoreOffset > LoadOffset \|\|
	StoreOffset + int64_t(StoreSize) < LoadOffset + int64_t(LoadSize))
	return -1;

	// Okay, we can do this transformation. Return the number of bytes into the
	// store that the load is.
	return LoadOffset - StoreOffset;
	}

	/// This function is called when we have a
	/// memdep query of a load that ends up being a clobbering store.
	int analyzeLoadFromClobberingStore(Type LoadTy, Value LoadPtr,
	StoreInst *DepSI, const DataLayout &DL) {
	auto *StoredVal = DepSI->getValueOperand();

	// Cannot handle reading from store of first-class aggregate or scalable type.
	if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
	return -1;

	if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
	return -1;

	Value *StorePtr = DepSI->getPointerOperand();
	uint64_t StoreSize =
	DL.getTypeSizeInBits(DepSI->getValueOperand()->getType()).getFixedSize();
	return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, StorePtr, StoreSize,
	DL);
	}

	/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
	/// Size) and compares it against a load.
	///
	/// If the specified load could be safely widened to a larger integer load
	/// that is 1) still efficient, 2) safe for the target, and 3) would provide
	/// the specified memory location value, then this function returns the size
	/// in bytes of the load width to use. If not, this returns zero.
	static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
	int64_t MemLocOffs,
	unsigned MemLocSize,
	const LoadInst *LI) {
	// We can only extend simple integer loads.
	if (!isa<IntegerType>(LI->getType()) \|\| !LI->isSimple())
	return 0;

	// Load widening is hostile to ThreadSanitizer: it may cause false positives
	// or make the reports more cryptic (access sizes are wrong).
	if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
	return 0;

	const DataLayout &DL = LI->getModule()->getDataLayout();

	// Get the base of this load.
	int64_t LIOffs = 0;
	const Value *LIBase =
	GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);

	// If the two pointers are not based on the same pointer, we can't tell that
	// they are related.
	if (LIBase != MemLocBase)
	return 0;

	// Okay, the two values are based on the same pointer, but returned as
	// no-alias. This happens when we have things like two byte loads at "P+1"
	// and "P+3". Check to see if increasing the size of the "LI" load up to its
	// alignment (or the largest native integer type) will allow us to load all
	// the bits required by MemLoc.

	// If MemLoc is before LI, then no widening of LI will help us out.
	if (MemLocOffs < LIOffs)
	return 0;

	// Get the alignment of the load in bytes. We assume that it is safe to load
	// any legal integer up to this size without a problem. For example, if we're
	// looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
	// widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
	// to i16.
	unsigned LoadAlign = LI->getAlign().value();

	int64_t MemLocEnd = MemLocOffs + MemLocSize;

	// If no amount of rounding up will let MemLoc fit into LI, then bail out.
	if (LIOffs + LoadAlign < MemLocEnd)
	return 0;

	// This is the size of the load to try. Start with the next larger power of
	// two.
	unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
	NewLoadByteSize = NextPowerOf2(NewLoadByteSize);

	while (true) {
	// If this load size is bigger than our known alignment or would not fit
	// into a native integer register, then we fail.
	if (NewLoadByteSize > LoadAlign \|\|
	!DL.fitsInLegalInteger(NewLoadByteSize * 8))
	return 0;

	if (LIOffs + NewLoadByteSize > MemLocEnd &&
	(LI->getParent()->getParent()->hasFnAttribute(
	Attribute::SanitizeAddress) \|\|
	LI->getParent()->getParent()->hasFnAttribute(
	Attribute::SanitizeHWAddress)))
	// We will be reading past the location accessed by the original program.
	// While this is safe in a regular build, Address Safety analysis tools
	// may start reporting false warnings. So, don't do widening.
	return 0;

	// If a load of this width would include all of MemLoc, then we succeed.
	if (LIOffs + NewLoadByteSize >= MemLocEnd)
	return NewLoadByteSize;

	NewLoadByteSize <<= 1;
	}
	}

	/// This function is called when we have a
	/// memdep query of a load that ends up being clobbered by another load. See if
	/// the other load can feed into the second load.
	int analyzeLoadFromClobberingLoad(Type LoadTy, Value LoadPtr, LoadInst *DepLI,
	const DataLayout &DL) {
	// Cannot handle reading from store of first-class aggregate yet.
	if (DepLI->getType()->isStructTy() \|\| DepLI->getType()->isArrayTy())
	return -1;

	if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
	return -1;

	Value *DepPtr = DepLI->getPointerOperand();
	uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedSize();
	int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
	if (R != -1)
	return R;

	// If we have a load/load clobber an DepLI can be widened to cover this load,
	// then we should widen it!
	int64_t LoadOffs = 0;
	const Value *LoadBase =
	GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
	unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();

	unsigned Size =
	getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI);
	if (Size == 0)
	return -1;

	// Check non-obvious conditions enforced by MDA which we rely on for being
	// able to materialize this potentially available value
	assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
	assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");

	return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
	}

	int analyzeLoadFromClobberingMemInst(Type LoadTy, Value LoadPtr,
	MemIntrinsic *MI, const DataLayout &DL) {
	// If the mem operation is a non-constant size, we can't handle it.
	ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
	if (!SizeCst)
	return -1;
	uint64_t MemSizeInBits = SizeCst->getZExtValue() * 8;

	// If this is memset, we just need to see if the offset is valid in the size
	// of the memset..
	- if (MI->getIntrinsicID() == Intrinsic::memset) {
	+ if (const auto *memset_inst = dyn_cast<MemSetInst>(MI)) {
	if (DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
	- auto *CI = dyn_cast<ConstantInt>(cast<MemSetInst>(MI)->getValue());
	+ auto *CI = dyn_cast<ConstantInt>(memset_inst->getValue());
	if (!CI \|\| !CI->isZero())
	return -1;
	}
	return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
	MemSizeInBits, DL);
	}

	// If we have a memcpy/memmove, the only case we can handle is if this is a
	// copy from constant memory. In that case, we can read directly from the
	// constant memory.
	MemTransferInst *MTI = cast<MemTransferInst>(MI);

	Constant *Src = dyn_cast<Constant>(MTI->getSource());
	if (!Src)
	return -1;

	GlobalVariable *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(Src));
	if (!GV \|\| !GV->isConstant() \|\| !GV->hasDefinitiveInitializer())
	return -1;

	// See if the access is within the bounds of the transfer.
	int Offset = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
	MemSizeInBits, DL);
	if (Offset == -1)
	return Offset;

	// Otherwise, see if we can constant fold a load from the constant with the
	// offset applied as appropriate.
	unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
	if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL))
	return Offset;
	return -1;
	}

	static Value getStoreValueForLoadHelper(Value SrcVal, unsigned Offset,
	Type *LoadTy, IRBuilderBase &Builder,
	const DataLayout &DL) {
	LLVMContext &Ctx = SrcVal->getType()->getContext();

	// If two pointers are in the same address space, they have the same size,
	// so we don't need to do any truncation, etc. This avoids introducing
	// ptrtoint instructions for pointers that may be non-integral.
	if (SrcVal->getType()->isPointerTy() && LoadTy->isPointerTy() &&
	cast<PointerType>(SrcVal->getType())->getAddressSpace() ==
	cast<PointerType>(LoadTy)->getAddressSpace()) {
	return SrcVal;
	}

	uint64_t StoreSize =
	(DL.getTypeSizeInBits(SrcVal->getType()).getFixedSize() + 7) / 8;
	uint64_t LoadSize = (DL.getTypeSizeInBits(LoadTy).getFixedSize() + 7) / 8;
	// Compute which bits of the stored value are being used by the load. Convert
	// to an integer type to start with.
	if (SrcVal->getType()->isPtrOrPtrVectorTy())
	SrcVal =
	Builder.CreatePtrToInt(SrcVal, DL.getIntPtrType(SrcVal->getType()));
	if (!SrcVal->getType()->isIntegerTy())
	SrcVal =
	Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize * 8));

	// Shift the bits to the least significant depending on endianness.
	unsigned ShiftAmt;
	if (DL.isLittleEndian())
	ShiftAmt = Offset * 8;
	else
	ShiftAmt = (StoreSize - LoadSize - Offset) * 8;
	if (ShiftAmt)
	SrcVal = Builder.CreateLShr(SrcVal,
	ConstantInt::get(SrcVal->getType(), ShiftAmt));

	if (LoadSize != StoreSize)
	SrcVal = Builder.CreateTruncOrBitCast(SrcVal,
	IntegerType::get(Ctx, LoadSize * 8));
	return SrcVal;
	}

	/// This function is called when we have a memdep query of a load that ends up
	/// being a clobbering store. This means that the store provides bits used by
	/// the load but the pointers don't must-alias. Check this case to see if
	/// there is anything more we can do before we give up.
	Value getStoreValueForLoad(Value SrcVal, unsigned Offset, Type *LoadTy,
	Instruction *InsertPt, const DataLayout &DL) {

	IRBuilder<> Builder(InsertPt);
	SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
	return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
	}

	Constant getConstantStoreValueForLoad(Constant SrcVal, unsigned Offset,
	Type *LoadTy, const DataLayout &DL) {
	return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
	}

	/// This function is called when we have a memdep query of a load that ends up
	/// being a clobbering load. This means that the load may provide bits used
	/// by the load but we can't be sure because the pointers don't must-alias.
	/// Check this case to see if there is anything more we can do before we give
	/// up.
	Value getLoadValueForLoad(LoadInst SrcVal, unsigned Offset, Type *LoadTy,
	Instruction *InsertPt, const DataLayout &DL) {
	// If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
	// widen SrcVal out to a larger load.
	unsigned SrcValStoreSize =
	DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
	unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
	if (Offset + LoadSize > SrcValStoreSize) {
	assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
	assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
	// If we have a load/load clobber an DepLI can be widened to cover this
	// load, then we should widen it to the next power of 2 size big enough!
	unsigned NewLoadSize = Offset + LoadSize;
	if (!isPowerOf2_32(NewLoadSize))
	NewLoadSize = NextPowerOf2(NewLoadSize);

	Value *PtrVal = SrcVal->getPointerOperand();
	// Insert the new load after the old load. This ensures that subsequent
	// memdep queries will find the new load. We can't easily remove the old
	// load completely because it is already in the value numbering table.
	IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
	Type DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize 8);
	Type *DestPTy =
	PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
	Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
	PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
	LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
	NewLoad->takeName(SrcVal);
	NewLoad->setAlignment(SrcVal->getAlign());

	LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
	LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");

	// Replace uses of the original load with the wider load. On a big endian
	// system, we need to shift down to get the relevant bits.
	Value *RV = NewLoad;
	if (DL.isBigEndian())
	RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
	RV = Builder.CreateTrunc(RV, SrcVal->getType());
	SrcVal->replaceAllUsesWith(RV);

	SrcVal = NewLoad;
	}

	return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
	}

	Constant getConstantLoadValueForLoad(Constant SrcVal, unsigned Offset,
	Type *LoadTy, const DataLayout &DL) {
	unsigned SrcValStoreSize =
	DL.getTypeStoreSize(SrcVal->getType()).getFixedSize();
	unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedSize();
	if (Offset + LoadSize > SrcValStoreSize)
	return nullptr;
	return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
	}

	/// This function is called when we have a
	/// memdep query of a load that ends up being a clobbering mem intrinsic.
	Value getMemInstValueForLoad(MemIntrinsic SrcInst, unsigned Offset,
	Type LoadTy, Instruction InsertPt,
	const DataLayout &DL) {
	LLVMContext &Ctx = LoadTy->getContext();
	uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;
	IRBuilder<> Builder(InsertPt);

	// We know that this method is only called when the mem transfer fully
	// provides the bits for the load.
	if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
	// memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
	// independently of what the offset is.
	Value *Val = MSI->getValue();
	if (LoadSize != 1)
	Val =
	Builder.CreateZExtOrBitCast(Val, IntegerType::get(Ctx, LoadSize * 8));
	Value *OneElt = Val;

	// Splat the value out to the right number of bits.
	for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize;) {
	// If we can double the number of bytes set, do it.
	if (NumBytesSet * 2 <= LoadSize) {
	Value *ShVal = Builder.CreateShl(
	Val, ConstantInt::get(Val->getType(), NumBytesSet * 8));
	Val = Builder.CreateOr(Val, ShVal);
	NumBytesSet <<= 1;
	continue;
	}

	// Otherwise insert one byte at a time.
	Value *ShVal =
	Builder.CreateShl(Val, ConstantInt::get(Val->getType(), 1 * 8));
	Val = Builder.CreateOr(OneElt, ShVal);
	++NumBytesSet;
	}

	return coerceAvailableValueToLoadType(Val, LoadTy, Builder, DL);
	}

	// Otherwise, this is a memcpy/memmove from a constant global.
	MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
	Constant *Src = cast<Constant>(MTI->getSource());
	unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
	return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
	DL);
	}

	Constant getConstantMemInstValueForLoad(MemIntrinsic SrcInst, unsigned Offset,
	Type *LoadTy, const DataLayout &DL) {
	LLVMContext &Ctx = LoadTy->getContext();
	uint64_t LoadSize = DL.getTypeSizeInBits(LoadTy).getFixedSize() / 8;

	// We know that this method is only called when the mem transfer fully
	// provides the bits for the load.
	if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
	auto *Val = dyn_cast<ConstantInt>(MSI->getValue());
	if (!Val)
	return nullptr;

	Val = ConstantInt::get(Ctx, APInt::getSplat(LoadSize * 8, Val->getValue()));
	return ConstantFoldLoadFromConst(Val, LoadTy, DL);
	}

	// Otherwise, this is a memcpy/memmove from a constant global.
	MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
	Constant *Src = cast<Constant>(MTI->getSource());
	unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
	return ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset),
	DL);
	}
	} // namespace VNCoercion
	} // namespace llvm

File Metadata

Mime Type: application/octet-stream
Expires: Sun, Jul 7, 1:03 AM (1 d, 23 h)
Storage Engine: chunks
Storage Format: Chunks
Storage Handle: rCTZPjyYMGYY
Default Alt Text: (5 MB)

Offset	End	Complete
0	4194304	Yes
4194304	5622463	Yes

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions